Whamcloud - gitweb
LU-3259 clio: cl_lock simplification 58/10858/15
authorJinshan Xiong <jinshan.xiong@intel.com>
Fri, 26 Sep 2014 21:46:30 +0000 (14:46 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 4 Nov 2014 17:52:13 +0000 (17:52 +0000)
In this patch, the cl_lock cache is eliminated. cl_lock is turned
into a cacheless data container for the requirements of locks to
complete the IO. cl_lock is created before I/O starts and destroyed
when the I/O is complete.

cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock
is attached to cl_lock at OSC layer. LDLM lock is still cacheable.

Two major methods are supported for cl_lock: clo_enqueue and
clo_cancel.  A cl_lock is enqueued by cl_lock_request(), which will
call clo_enqueue() methods for each layer to enqueue the lock.
At the LOV layer, if a cl_lock consists of multiple sub cl_locks,
each sub locks will be enqueued correspondingly. At OSC layer, the
lock enqueue request will tend to reuse cached LDLM lock; otherwise
a new LDLM lock will have to be requested from OST side.

cl_lock_cancel() must be called to release a cl_lock after use.
clo_cancel() method will be called for each layer to release the
resource held by this lock. At OSC layer, the reference count of LDLM
lock, which is held at clo_enqueue time, is released.

LDLM lock can only be canceled if there is no cl_lock using it.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: I6a61250549cfbc28070fe4bb7789ba7429eaf089
Reviewed-on: http://review.whamcloud.com/10858
Tested-by: Jenkins
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
34 files changed:
lustre/include/cl_object.h
lustre/include/lclient.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_dlm.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/llite/glimpse.c
lustre/llite/lcommon_cl.c
lustre/llite/lcommon_misc.c
lustre/llite/rw26.c
lustre/llite/vvp_io.c
lustre/llite/vvp_lock.c
lustre/llite/vvp_object.c
lustre/lov/lov_cl_internal.h
lustre/lov/lov_dev.c
lustre/lov/lov_lock.c
lustre/lov/lov_object.c
lustre/lov/lovsub_lock.c
lustre/obdclass/cl_io.c
lustre/obdclass/cl_lock.c
lustre/obdclass/cl_object.c
lustre/obdclass/cl_page.c
lustre/obdecho/echo_client.c
lustre/osc/osc_cache.c
lustre/osc/osc_cl_internal.h
lustre/osc/osc_internal.h
lustre/osc/osc_io.c
lustre/osc/osc_lock.c
lustre/osc/osc_object.c
lustre/osc/osc_page.c
lustre/osc/osc_request.c
lustre/tests/sanityn.sh

index 0b574da..0c9bed8 100644 (file)
@@ -82,7 +82,6 @@
  *  - i_mutex
  *      - PG_locked
  *          - cl_object_header::coh_page_guard
  *  - i_mutex
  *      - PG_locked
  *          - cl_object_header::coh_page_guard
- *          - cl_object_header::coh_lock_guard
  *          - lu_site::ls_guard
  *
  * See the top comment in cl_object.c for the description of overall locking and
  *          - lu_site::ls_guard
  *
  * See the top comment in cl_object.c for the description of overall locking and
@@ -404,16 +403,6 @@ struct cl_object_header {
         /** Standard lu_object_header. cl_object::co_lu::lo_header points
          * here. */
        struct lu_object_header coh_lu;
         /** Standard lu_object_header. cl_object::co_lu::lo_header points
          * here. */
        struct lu_object_header coh_lu;
-        /** \name locks
-         * \todo XXX move locks below to the separate cache-lines, they are
-         * mostly useless otherwise.
-         */
-        /** @{ */
-       /** Lock protecting lock list. */
-       spinlock_t              coh_lock_guard;
-       /** @} locks */
-       /** List of cl_lock's granted for this object. */
-       struct list_head        coh_locks;
 
         /**
          * Parent object. It is assumed that an object has a well-defined
 
         /**
          * Parent object. It is assumed that an object has a well-defined
@@ -773,18 +762,11 @@ struct cl_page_slice {
 /**
  * Lock mode. For the client extent locks.
  *
 /**
  * Lock mode. For the client extent locks.
  *
- * \warning: cl_lock_mode_match() assumes particular ordering here.
  * \ingroup cl_lock
  */
 enum cl_lock_mode {
  * \ingroup cl_lock
  */
 enum cl_lock_mode {
-        /**
-         * Mode of a lock that protects no data, and exists only as a
-         * placeholder. This is used for `glimpse' requests. A phantom lock
-         * might get promoted to real lock at some point.
-         */
-        CLM_PHANTOM,
-        CLM_READ,
-        CLM_WRITE,
+       CLM_READ,
+       CLM_WRITE,
        CLM_GROUP,
        CLM_MAX,
 };
        CLM_GROUP,
        CLM_MAX,
 };
@@ -1099,12 +1081,6 @@ static inline bool __page_in_use(const struct cl_page *page, int refc)
  * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
  * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
  *
  * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
  * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
  *
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
  * Typical cl_lock consists of the two layers:
  *
  *     - vvp_lock (vvp specific data), and
  * Typical cl_lock consists of the two layers:
  *
  *     - vvp_lock (vvp specific data), and
@@ -1305,289 +1281,21 @@ struct cl_lock_descr {
         __u32             cld_enq_flags;
 };
 
         __u32             cld_enq_flags;
 };
 
-#define DDESCR "%s(%d):[%lu, %lu]"
-#define PDESCR(descr)                                                   \
-        cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,        \
-        (descr)->cld_start, (descr)->cld_end
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
+#define PDESCR(descr)                                                  \
+       cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,        \
+       (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
 
 const char *cl_lock_mode_name(const enum cl_lock_mode mode);
 
 /**
 
 const char *cl_lock_mode_name(const enum cl_lock_mode mode);
 
 /**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- *              +------------------>NEW
- *              |                    |
- *              |                    | cl_enqueue_try()
- *              |                    |
- *              |    cl_unuse_try()  V
- *              |  +--------------QUEUING (*)
- *              |  |                 |
- *              |  |                 | cl_enqueue_try()
- *              |  |                 |
- *              |  | cl_unuse_try()  V
- *    sub-lock  |  +-------------ENQUEUED (*)
- *    canceled  |  |                 |
- *              |  |                 | cl_wait_try()
- *              |  |                 |
- *              |  |                (R)
- *              |  |                 |
- *              |  |                 V
- *              |  |                HELD<---------+
- *              |  |                 |            |
- *              |  |                 |            | cl_use_try()
- *              |  |  cl_unuse_try() |            |
- *              |  |                 |            |
- *              |  |                 V         ---+ 
- *              |  +------------>INTRANSIT (D) <--+
- *              |                    |            |
- *              |     cl_unuse_try() |            | cached lock found
- *              |                    |            | cl_use_try()
- *              |                    |            |
- *              |                    V            |
- *              +------------------CACHED---------+
- *                                   |
- *                                  (C)
- *                                   |
- *                                   V
- *                                FREEING
- *
- * Legend:
- *
- *         In states marked with (*) transition to the same state (i.e., a loop
- *         in the diagram) is possible.
- *
- *         (R) is the point where Receive call-back is invoked: it allows layers
- *         to handle arrival of lock reply.
- *
- *         (C) is the point where Cancellation call-back is invoked.
- *
- *         (D) is the transit state which means the lock is changing.
- *
- *         Transition to FREEING state is possible from any other state in the
- *         diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
-        /**
-         * Lock that wasn't yet enqueued
-         */
-        CLS_NEW,
-        /**
-         * Enqueue is in progress, blocking for some intermediate interaction
-         * with the other side.
-         */
-        CLS_QUEUING,
-        /**
-         * Lock is fully enqueued, waiting for server to reply when it is
-         * granted.
-         */
-        CLS_ENQUEUED,
-        /**
-         * Lock granted, actively used by some IO.
-         */
-        CLS_HELD,
-        /**
-         * This state is used to mark the lock is being used, or unused.
-         * We need this state because the lock may have several sublocks,
-         * so it's impossible to have an atomic way to bring all sublocks
-         * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
-         * at unuse case.
-         * If a thread is referring to a lock, and it sees the lock is in this
-         * state, it must wait for the lock.
-         * See state diagram for details.
-         */
-        CLS_INTRANSIT,
-        /**
-         * Lock granted, not used.
-         */
-        CLS_CACHED,
-        /**
-         * Lock is being destroyed.
-         */
-        CLS_FREEING,
-        CLS_NR
-};
-
-enum cl_lock_flags {
-        /**
-         * lock has been cancelled. This flag is never cleared once set (by
-         * cl_lock_cancel0()).
-         */
-        CLF_CANCELLED  = 1 << 0,
-        /** cancellation is pending for this lock. */
-        CLF_CANCELPEND = 1 << 1,
-        /** destruction is pending for this lock. */
-        CLF_DOOMED     = 1 << 2,
-        /** from enqueue RPC reply upcall. */
-        CLF_FROM_UPCALL= 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- *     - nested state-machines (top-lock state-machine composed of sub-lock
- *       state-machines), and
- *
- *     - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
-        /**
-         * Lock that is mutexed when closure construction is started. When
-         * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
-         * origin is released before waiting.
-         */
-        struct cl_lock   *clc_origin;
-        /**
-         * List of enclosed locks, so far. Locks are linked here through
-         * cl_lock::cll_inclosure.
-         */
-       struct list_head  clc_list;
-        /**
-         * True iff closure is in a `wait' mode. This determines what
-         * cl_lock_enclosure() does when a lock L to be added to the closure
-         * is currently mutexed by some other thread.
-         *
-         * If cl_lock_closure::clc_wait is not set, then closure construction
-         * fails with CLO_REPEAT immediately.
-         *
-         * In wait mode, cl_lock_enclosure() waits until next attempt to build
-         * a closure might succeed. To this end it releases an origin mutex
-         * (cl_lock_closure::clc_origin), that has to be the only lock mutex
-         * owned by the current thread, and then waits on L mutex (by grabbing
-         * it and immediately releasing), before returning CLO_REPEAT to the
-         * caller.
-         */
-        int               clc_wait;
-        /** Number of locks in the closure. */
-        int               clc_nr;
-};
-
-/**
  * Layered client lock.
  */
 struct cl_lock {
  * Layered client lock.
  */
 struct cl_lock {
-       /** Reference counter. */
-       atomic_t                cll_ref;
        /** List of slices. Immutable after creation. */
        /** List of slices. Immutable after creation. */
-       struct list_head        cll_layers;
-       /**
-        * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
-        * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
-        */
-       struct list_head        cll_linkage;
-       /**
-        * Parameters of this lock. Protected by
-        * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
-        * cl_lock::cll_guard. Modified only on lock creation and in
-        * cl_lock_modify().
-        */
-        struct cl_lock_descr  cll_descr;
-        /** Protected by cl_lock::cll_guard. */
-       enum cl_lock_state    cll_state;
-       /** signals state changes. */
-       wait_queue_head_t     cll_wq;
-       /**
-        * Recursive lock, most fields in cl_lock{} are protected by this.
-        *
-        * Locking rules: this mutex is never held across network
-        * communication, except when lock is being canceled.
-        *
-        * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
-        * on a top-lock. Other direction is implemented through a
-        * try-lock-repeat loop. Mutices of unrelated locks can be taken only
-        * by try-locking.
-        *
-        * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
-        */
-       struct mutex            cll_guard;
-       struct task_struct    *cll_guarder;
-        int                   cll_depth;
-
-        /**
-         * the owner for INTRANSIT state
-         */
-       struct task_struct    *cll_intransit_owner;
-        int                   cll_error;
-        /**
-         * Number of holds on a lock. A hold prevents a lock from being
-         * canceled and destroyed. Protected by cl_lock::cll_guard.
-         *
-         * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
-         */
-        int                   cll_holds;
-         /**
-          * Number of lock users. Valid in cl_lock_state::CLS_HELD state
-          * only. Lock user pins lock in CLS_HELD state. Protected by
-          * cl_lock::cll_guard.
-          *
-          * \see cl_wait(), cl_unuse().
-          */
-        int                   cll_users;
-        /**
-         * Flag bit-mask. Values from enum cl_lock_flags. Updates are
-         * protected by cl_lock::cll_guard.
-         */
-        unsigned long         cll_flags;
-        /**
-         * A linkage into a list of locks in a closure.
-         *
-         * \see cl_lock_closure
-         */
-       struct list_head        cll_inclosure;
-        /**
-         * Confict lock at queuing time.
-         */
-        struct cl_lock       *cll_conflict;
-        /**
-         * A list of references to this lock, for debugging.
-         */
-        struct lu_ref         cll_reference;
-        /**
-         * A list of holds on this lock, for debugging.
-         */
-        struct lu_ref         cll_holders;
-       /**
-        * A reference for cl_lock::cll_descr::cld_obj. For debugging.
-        */
-       struct lu_ref_link    cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
-       /* "dep_map" name is assumed by lockdep.h macros. */
-       struct lockdep_map    dep_map;
-#endif
+       struct list_head      cll_layers;
+       /** lock attribute, extent, cl_object, etc. */
+       struct cl_lock_descr  cll_descr;
 };
 
 /**
 };
 
 /**
@@ -1606,170 +1314,32 @@ struct cl_lock_slice {
 };
 
 /**
 };
 
 /**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
-        /** operation cannot be completed immediately. Wait for state change. */
-        CLO_WAIT        = 1,
-        /** operation had to release lock mutex, restart. */
-        CLO_REPEAT      = 2,
-        /** lower layer re-enqueued. */
-        CLO_REENQUEUED  = 3,
-};
-
-/**
  *
  * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
  */
 struct cl_lock_operations {
  *
  * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
  */
 struct cl_lock_operations {
-        /**
-         * \name statemachine
-         *
-         * State machine transitions. These 3 methods are called to transfer
-         * lock from one state to another, as described in the commentary
-         * above enum #cl_lock_state.
-         *
-         * \retval 0          this layer has nothing more to do to before
-         *                       transition to the target state happens;
-         *
-         * \retval CLO_REPEAT method had to release and re-acquire cl_lock
-         *                    mutex, repeat invocation of transition method
-         *                    across all layers;
-         *
-         * \retval CLO_WAIT   this layer cannot move to the target state
-         *                    immediately, as it has to wait for certain event
-         *                    (e.g., the communication with the server). It
-         *                    is guaranteed, that when the state transfer
-         *                    becomes possible, cl_lock::cll_wq wait-queue
-         *                    is signaled. Caller can wait for this event by
-         *                    calling cl_lock_state_wait();
-         *
-         * \retval -ve        failure, abort state transition, move the lock
-         *                    into cl_lock_state::CLS_FREEING state, and set
-         *                    cl_lock::cll_error.
-         *
-         * Once all layers voted to agree to transition (by returning 0), lock
-         * is moved into corresponding target state. All state transition
-         * methods are optional.
-         */
-        /** @{ */
-        /**
-         * Attempts to enqueue the lock. Called top-to-bottom.
-         *
-         * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
-         * \see osc_lock_enqueue()
-         */
-        int  (*clo_enqueue)(const struct lu_env *env,
-                            const struct cl_lock_slice *slice,
-                            struct cl_io *io, __u32 enqflags);
-        /**
-         * Attempts to wait for enqueue result. Called top-to-bottom.
-         *
-         * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
-         */
-        int  (*clo_wait)(const struct lu_env *env,
-                         const struct cl_lock_slice *slice);
-        /**
-         * Attempts to unlock the lock. Called bottom-to-top. In addition to
-         * usual return values of lock state-machine methods, this can return
-         * -ESTALE to indicate that lock cannot be returned to the cache, and
-         * has to be re-initialized.
-         * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
-         *
-         * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
-         */
-        int  (*clo_unuse)(const struct lu_env *env,
-                          const struct cl_lock_slice *slice);
-        /**
-         * Notifies layer that cached lock is started being used.
-         *
-         * \pre lock->cll_state == CLS_CACHED
-         *
-         * \see lov_lock_use(), osc_lock_use()
-         */
-        int  (*clo_use)(const struct lu_env *env,
-                        const struct cl_lock_slice *slice);
-        /** @} statemachine */
-        /**
-         * A method invoked when lock state is changed (as a result of state
-         * transition). This is used, for example, to track when the state of
-         * a sub-lock changes, to propagate this change to the corresponding
-         * top-lock. Optional
-         *
-         * \see lovsub_lock_state()
-         */
-        void (*clo_state)(const struct lu_env *env,
-                          const struct cl_lock_slice *slice,
-                          enum cl_lock_state st);
-        /**
-         * Returns true, iff given lock is suitable for the given io, idea
-         * being, that there are certain "unsafe" locks, e.g., ones acquired
-         * for O_APPEND writes, that we don't want to re-use for a normal
-         * write, to avoid the danger of cascading evictions. Optional. Runs
-         * under cl_object_header::coh_lock_guard.
-         *
-         * XXX this should take more information about lock needed by
-         * io. Probably lock description or something similar.
-         *
-         * \see lov_fits_into()
-         */
-        int (*clo_fits_into)(const struct lu_env *env,
-                             const struct cl_lock_slice *slice,
-                             const struct cl_lock_descr *need,
-                             const struct cl_io *io);
-        /**
-         * \name ast
-         * Asynchronous System Traps. All of then are optional, all are
-         * executed bottom-to-top.
-         */
-        /** @{ */
-
-        /**
-         * Cancellation callback. Cancel a lock voluntarily, or under
-         * the request of server.
-         */
-        void (*clo_cancel)(const struct lu_env *env,
-                           const struct cl_lock_slice *slice);
-        /**
-         * Lock weighting ast. Executed to estimate how precious this lock
-         * is. The sum of results across all layers is used to determine
-         * whether lock worth keeping in cache given present memory usage.
-         *
-         * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
-         */
-        unsigned long (*clo_weigh)(const struct lu_env *env,
-                                   const struct cl_lock_slice *slice);
-        /** @} ast */
-
-        /**
-         * \see lovsub_lock_closure()
-         */
-        int (*clo_closure)(const struct lu_env *env,
-                           const struct cl_lock_slice *slice,
-                           struct cl_lock_closure *closure);
-        /**
-         * Executed bottom-to-top when lock description changes (e.g., as a
-         * result of server granting more generous lock than was requested).
-         *
-         * \see lovsub_lock_modify()
-         */
-        int (*clo_modify)(const struct lu_env *env,
-                          const struct cl_lock_slice *slice,
-                          const struct cl_lock_descr *updated);
-        /**
-         * Notifies layers (bottom-to-top) that lock is going to be
-         * destroyed. Responsibility of layers is to prevent new references on
-         * this lock from being acquired once this method returns.
-         *
-         * This can be called multiple times due to the races.
-         *
-         * \see cl_lock_delete()
-         * \see osc_lock_delete(), lovsub_lock_delete()
-         */
-        void (*clo_delete)(const struct lu_env *env,
-                           const struct cl_lock_slice *slice);
+       /** @{ */
+       /**
+        * Attempts to enqueue the lock. Called top-to-bottom.
+        *
+        * \retval 0    this layer has enqueued the lock successfully
+        * \retval >0   this layer has enqueued the lock, but need to wait on
+        *              @anchor for resources
+        * \retval -ve  failure
+        *
+        * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+        * \see osc_lock_enqueue()
+        */
+       int  (*clo_enqueue)(const struct lu_env *env,
+                           const struct cl_lock_slice *slice,
+                           struct cl_io *io, struct cl_sync_io *anchor);
+       /**
+        * Cancel a lock, release its DLM lock ref, while does not cancel the
+        * DLM lock
+        */
+       void (*clo_cancel)(const struct lu_env *env,
+                          const struct cl_lock_slice *slice);
+       /** @} */
         /**
          * Destructor. Frees resources and the slice.
          *
         /**
          * Destructor. Frees resources and the slice.
          *
@@ -2147,10 +1717,14 @@ enum cl_enq_flags {
          * for async glimpse lock.
          */
         CEF_AGL          = 0x00000020,
          * for async glimpse lock.
          */
         CEF_AGL          = 0x00000020,
-        /**
-         * mask of enq_flags.
-         */
-        CEF_MASK         = 0x0000003f,
+       /**
+        * enqueue a lock to test DLM lock existence.
+        */
+       CEF_PEEK        = 0x00000040,
+       /**
+        * mask of enq_flags.
+        */
+       CEF_MASK         = 0x0000007f,
 };
 
 /**
 };
 
 /**
@@ -2159,13 +1733,13 @@ enum cl_enq_flags {
  */
 struct cl_io_lock_link {
        /** linkage into one of cl_lockset lists. */
  */
 struct cl_io_lock_link {
        /** linkage into one of cl_lockset lists. */
-       struct list_head        cill_linkage;
-       struct cl_lock_descr    cill_descr;
-       struct cl_lock          *cill_lock;
+       struct list_head        cill_linkage;
+       struct cl_lock          cill_lock;
        /** optional destructor */
        /** optional destructor */
-       void                    (*cill_fini)(const struct lu_env *env,
+       void                    (*cill_fini)(const struct lu_env *env,
                                             struct cl_io_lock_link *link);
 };
                                             struct cl_io_lock_link *link);
 };
+#define cill_descr     cill_lock.cll_descr
 
 /**
  * Lock-set represents a collection of locks, that io needs at a
 
 /**
  * Lock-set represents a collection of locks, that io needs at a
@@ -2199,8 +1773,6 @@ struct cl_io_lock_link {
 struct cl_lockset {
        /** locks to be acquired. */
        struct list_head  cls_todo;
 struct cl_lockset {
        /** locks to be acquired. */
        struct list_head  cls_todo;
-       /** locks currently being processed. */
-       struct list_head  cls_curr;
        /** locks acquired. */
        struct list_head  cls_done;
 };
        /** locks acquired. */
        struct list_head  cls_done;
 };
@@ -2566,9 +2138,7 @@ struct cl_site {
         * and top-locks (and top-pages) are accounted here.
         */
        struct cache_stats      cs_pages;
         * and top-locks (and top-pages) are accounted here.
         */
        struct cache_stats      cs_pages;
-       struct cache_stats      cs_locks;
        atomic_t                cs_pages_state[CPS_NR];
        atomic_t                cs_pages_state[CPS_NR];
-       atomic_t                cs_locks_state[CLS_NR];
 };
 
 int  cl_site_init(struct cl_site *s, struct cl_device *top);
 };
 
 int  cl_site_init(struct cl_site *s, struct cl_device *top);
@@ -2686,9 +2256,8 @@ int  cl_object_glimpse    (const struct lu_env *env, struct cl_object *obj,
                            struct ost_lvb *lvb);
 int  cl_conf_set          (const struct lu_env *env, struct cl_object *obj,
                            const struct cl_object_conf *conf);
                            struct ost_lvb *lvb);
 int  cl_conf_set          (const struct lu_env *env, struct cl_object *obj,
                            const struct cl_object_conf *conf);
-void cl_object_prune      (const struct lu_env *env, struct cl_object *obj);
+int  cl_object_prune      (const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill       (const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill       (const struct lu_env *env, struct cl_object *obj);
-int  cl_object_has_locks  (struct cl_object *obj);
 
 /**
  * Returns true, iff \a o0 and \a o1 are slices of the same object.
 
 /**
  * Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2830,128 +2399,18 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
 
 /** \defgroup cl_lock cl_lock
  * @{ */
 
 /** \defgroup cl_lock cl_lock
  * @{ */
-
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-                             const struct cl_lock_descr *need,
-                             const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-                             const struct cl_lock_descr *need,
-                             const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-                                const struct cl_lock_descr *need,
-                                const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-                                struct cl_object *obj, pgoff_t index,
-                                struct cl_lock *except, int pending,
-                                int canceld);
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+                   struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+                const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
-                                       const struct lu_device_type *dtype);
-
-void  cl_lock_get       (struct cl_lock *lock);
-void  cl_lock_get_trust (struct cl_lock *lock);
-void  cl_lock_put       (const struct lu_env *env, struct cl_lock *lock);
-void  cl_lock_hold_add  (const struct lu_env *env, struct cl_lock *lock,
-                         const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-                         const char *scope, const void *source);
-void  cl_lock_unhold    (const struct lu_env *env, struct cl_lock *lock,
-                         const char *scope, const void *source);
-void  cl_lock_release   (const struct lu_env *env, struct cl_lock *lock,
-                         const char *scope, const void *source);
-void  cl_lock_user_add  (const struct lu_env *env, struct cl_lock *lock);
-void  cl_lock_user_del  (const struct lu_env *env, struct cl_lock *lock);
-
-enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
-                                     struct cl_lock *lock);
-void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
-                       enum cl_lock_state state);
-int cl_lock_is_intransit(struct cl_lock *lock);
-
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
-                         int keep_mutex);
-
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- *     - "try" functions that attempt to effect a state transition. If state
- *     transition is not possible right now (e.g., if it has to wait for some
- *     asynchronous event to occur), these functions return
- *     cl_lock_transition::CLO_WAIT.
- *
- *     - "non-try" functions that implement synchronous blocking interface on
- *     top of non-blocking "try" functions. These functions repeatedly call
- *     corresponding "try" versions, and if state transition is not possible
- *     immediately, wait for lock state change.
- *
- *     - methods from cl_lock_operations, called by "try" functions. Lock can
- *     be advanced to the target state only when all layers voted that they
- *     are ready for this transition. "Try" functions call methods under lock
- *     mutex. If a layer had to release a mutex, it re-acquires it and returns
- *     cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- *     layers again.
- *
- * TRY              NON-TRY      METHOD                            FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try()    cl_wait()    cl_lock_operations::clo_wait()    CLS_HELD
- *
- * cl_unuse_try()   cl_unuse()   cl_lock_operations::clo_unuse()   CLS_CACHED
- *
- * cl_use_try()     NONE         cl_lock_operations::clo_use()     CLS_HELD
- *
- * @{ */
-
-int   cl_enqueue    (const struct lu_env *env, struct cl_lock *lock,
-                     struct cl_io *io, __u32 flags);
-int   cl_wait       (const struct lu_env *env, struct cl_lock *lock);
-void  cl_unuse      (const struct lu_env *env, struct cl_lock *lock);
-int   cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-                     struct cl_io *io, __u32 flags);
-int   cl_unuse_try  (const struct lu_env *env, struct cl_lock *lock);
-int   cl_wait_try   (const struct lu_env *env, struct cl_lock *lock);
-int   cl_use_try    (const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal      (const struct lu_env *env, struct cl_lock *lock);
-int  cl_lock_state_wait  (const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set   (const struct lu_env *env, struct cl_lock *lock,
-                          enum cl_lock_state state);
-int  cl_queue_match(const struct list_head *queue,
-                   const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get  (const struct lu_env *env, struct cl_lock *lock);
-int  cl_lock_mutex_try  (const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put  (const struct lu_env *env, struct cl_lock *lock);
-int  cl_lock_is_mutexed (struct cl_lock *lock);
-int  cl_lock_nr_mutexed (const struct lu_env *env);
-int  cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int  cl_lock_ext_match  (const struct cl_lock_descr *has,
-                         const struct cl_lock_descr *need);
-int  cl_lock_descr_match(const struct cl_lock_descr *has,
-                         const struct cl_lock_descr *need);
-int  cl_lock_mode_match (enum cl_lock_mode has, enum cl_lock_mode need);
-int  cl_lock_modify     (const struct lu_env *env, struct cl_lock *lock,
-                         const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init (const struct lu_env *env,
-                           struct cl_lock_closure *closure,
-                           struct cl_lock *origin, int wait);
-void cl_lock_closure_fini (struct cl_lock_closure *closure);
-int  cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-                           struct cl_lock_closure *closure);
-void cl_lock_disclosure   (const struct lu_env *env,
-                           struct cl_lock_closure *closure);
-int  cl_lock_enclosure    (const struct lu_env *env, struct cl_lock *lock,
-                           struct cl_lock_closure *closure);
+                                      const struct lu_device_type *dtype);
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
 
 
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+                   struct cl_lock *lock, struct cl_sync_io *anchor);
 void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
 void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error (const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
 
 /** @} cl_lock */
 
 
 /** @} cl_lock */
 
index c6ea235..9c56e69 100644 (file)
@@ -121,10 +121,6 @@ struct ccc_io {
                } write;
        } u;
        /**
                } write;
        } u;
        /**
-        * True iff io is processing glimpse right now.
-        */
-       int                  cui_glimpse;
-       /**
         * Layout version when this IO is initialized
         */
        __u32                cui_layout_gen;
         * Layout version when this IO is initialized
         */
        __u32                cui_layout_gen;
@@ -145,9 +141,10 @@ extern struct lu_context_key ccc_key;
 extern struct lu_context_key ccc_session_key;
 
 struct ccc_thread_info {
 extern struct lu_context_key ccc_session_key;
 
 struct ccc_thread_info {
-        struct cl_lock_descr cti_descr;
-        struct cl_io         cti_io;
-        struct cl_attr       cti_attr;
+       struct cl_lock          cti_lock;
+       struct cl_lock_descr    cti_descr;
+       struct cl_io            cti_io;
+       struct cl_attr          cti_attr;
 };
 
 static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
 };
 
 static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
@@ -159,6 +156,13 @@ static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
         return info;
 }
 
         return info;
 }
 
+static inline struct cl_lock *ccc_env_lock(const struct lu_env *env)
+{
+       struct cl_lock *lock = &ccc_env_info(env)->cti_lock;
+       memset(lock, 0, sizeof(*lock));
+       return lock;
+}
+
 static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
 {
         struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
 static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
 {
         struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
@@ -349,18 +353,7 @@ void ccc_lock_delete(const struct lu_env *env,
                      const struct cl_lock_slice *slice);
 void ccc_lock_fini(const struct lu_env *env,struct cl_lock_slice *slice);
 int ccc_lock_enqueue(const struct lu_env *env,const struct cl_lock_slice *slice,
                      const struct cl_lock_slice *slice);
 void ccc_lock_fini(const struct lu_env *env,struct cl_lock_slice *slice);
 int ccc_lock_enqueue(const struct lu_env *env,const struct cl_lock_slice *slice,
-                     struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env,const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env,const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env,const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
-                       const struct cl_lock_slice *slice,
-                       const struct cl_lock_descr *need,
-                       const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
-                    const struct cl_lock_slice *slice,
-                    enum cl_lock_state state);
-
+                    struct cl_io *io, struct cl_sync_io *anchor);
 void ccc_io_fini(const struct lu_env *env, const struct cl_io_slice *ios);
 int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
                           __u32 enqflags, enum cl_lock_mode mode,
 void ccc_io_fini(const struct lu_env *env, const struct cl_io_slice *ios);
 int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
                           __u32 enqflags, enum cl_lock_mode mode,
index dfc7933..9d695a7 100644 (file)
@@ -2971,6 +2971,8 @@ struct ldlm_extent {
         __u64 gid;
 };
 
         __u64 gid;
 };
 
+#define LDLM_GID_ANY  ((__u64) -1)
+
 static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
                                      const struct ldlm_extent *ex2)
 {
 static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
                                      const struct ldlm_extent *ex2)
 {
index 6bc0b41..1b988b2 100644 (file)
@@ -72,6 +72,7 @@ struct obd_device;
  */
 typedef enum {
         ELDLM_OK = 0,
  */
 typedef enum {
         ELDLM_OK = 0,
+       ELDLM_LOCK_MATCHED = 1,
 
         ELDLM_LOCK_CHANGED = 300,
         ELDLM_LOCK_ABORTED = 301,
 
         ELDLM_LOCK_CHANGED = 300,
         ELDLM_LOCK_ABORTED = 301,
index 0eb34d7..0358433 100644 (file)
@@ -2514,6 +2514,7 @@ int ldlm_error2errno(ldlm_error_t error)
 
         switch (error) {
         case ELDLM_OK:
 
         switch (error) {
         case ELDLM_OK:
+       case ELDLM_LOCK_MATCHED:
                 result = 0;
                 break;
         case ELDLM_LOCK_CHANGED:
                 result = 0;
                 break;
         case ELDLM_LOCK_CHANGED:
index d23ba79..72141c8 100644 (file)
@@ -718,12 +718,12 @@ void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
  */
 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
 {
  */
 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
 {
-        struct ldlm_lock *lock;
+       struct ldlm_lock *lock;
 
 
-        lock = ldlm_handle2lock(lockh);
-        LASSERT(lock != NULL);
-        ldlm_lock_addref_internal(lock, mode);
-        LDLM_LOCK_PUT(lock);
+       lock = ldlm_handle2lock(lockh);
+       LASSERTF(lock != NULL, "Non-existing lock: "LPX64"\n", lockh->cookie);
+       ldlm_lock_addref_internal(lock, mode);
+       LDLM_LOCK_PUT(lock);
 }
 EXPORT_SYMBOL(ldlm_lock_addref);
 
 }
 EXPORT_SYMBOL(ldlm_lock_addref);
 
@@ -1181,10 +1181,11 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
                         continue;
 
                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
                         continue;
 
-                if (unlikely(match == LCK_GROUP) &&
-                    lock->l_resource->lr_type == LDLM_EXTENT &&
-                    lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
-                        continue;
+               if (unlikely(match == LCK_GROUP) &&
+                   lock->l_resource->lr_type == LDLM_EXTENT &&
+                   policy->l_extent.gid != LDLM_GID_ANY &&
+                   lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
+                       continue;
 
                 /* We match if we have existing lock with same or wider set
                    of bits. */
 
                 /* We match if we have existing lock with same or wider set
                    of bits. */
index defd2ac..25e1b4d 100644 (file)
@@ -555,7 +555,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
         struct ldlm_lock *lock;
         struct ldlm_reply *reply;
         int cleanup_phase = 1;
         struct ldlm_lock *lock;
         struct ldlm_reply *reply;
         int cleanup_phase = 1;
-       int size = 0;
         ENTRY;
 
         lock = ldlm_handle2lock(lockh);
         ENTRY;
 
         lock = ldlm_handle2lock(lockh);
@@ -582,8 +581,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
        if (reply == NULL)
                GOTO(cleanup, rc = -EPROTO);
 
        if (reply == NULL)
                GOTO(cleanup, rc = -EPROTO);
 
-       if (lvb_len != 0) {
-               LASSERT(lvb != NULL);
+       if (lvb_len > 0) {
+               int size = 0;
 
                size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
                                            RCL_SERVER);
 
                size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
                                            RCL_SERVER);
@@ -596,13 +595,14 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                                   lvb_len, size);
                        GOTO(cleanup, rc = -EINVAL);
                }
                                   lvb_len, size);
                        GOTO(cleanup, rc = -EINVAL);
                }
+               lvb_len = size;
        }
 
        if (rc == ELDLM_LOCK_ABORTED) {
        }
 
        if (rc == ELDLM_LOCK_ABORTED) {
-               if (lvb_len != 0)
+               if (lvb_len > 0 && lvb != NULL)
                        rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
                        rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-                                          lvb, size);
-               GOTO(cleanup, rc = (rc != 0 ? rc : ELDLM_LOCK_ABORTED));
+                                          lvb, lvb_len);
+               GOTO(cleanup, rc = rc ? : ELDLM_LOCK_ABORTED);
        }
 
         /* lock enqueued on the server */
        }
 
         /* lock enqueued on the server */
@@ -676,7 +676,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
         /* If the lock has already been granted by a completion AST, don't
          * clobber the LVB with an older one. */
 
         /* If the lock has already been granted by a completion AST, don't
          * clobber the LVB with an older one. */
-       if (lvb_len != 0) {
+       if (lvb_len > 0) {
                /* We must lock or a racing completion might update lvb without
                 * letting us know and we'll clobber the correct value.
                 * Cannot unlock after the check either, a that still leaves
                /* We must lock or a racing completion might update lvb without
                 * letting us know and we'll clobber the correct value.
                 * Cannot unlock after the check either, a that still leaves
@@ -684,7 +684,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                lock_res_and_lock(lock);
                if (lock->l_req_mode != lock->l_granted_mode)
                        rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
                lock_res_and_lock(lock);
                if (lock->l_req_mode != lock->l_granted_mode)
                        rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-                                          lock->l_lvb_data, size);
+                                          lock->l_lvb_data, lvb_len);
                unlock_res_and_lock(lock);
                if (rc < 0) {
                        cleanup_phase = 1;
                unlock_res_and_lock(lock);
                if (rc < 0) {
                        cleanup_phase = 1;
@@ -703,11 +703,11 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                 }
         }
 
                 }
         }
 
-        if (lvb_len && lvb != NULL) {
-                /* Copy the LVB here, and not earlier, because the completion
-                 * AST (if any) can override what we got in the reply */
-                memcpy(lvb, lock->l_lvb_data, lvb_len);
-        }
+       if (lvb_len > 0 && lvb != NULL) {
+               /* Copy the LVB here, and not earlier, because the completion
+                * AST (if any) can override what we got in the reply */
+               memcpy(lvb, lock->l_lvb_data, lvb_len);
+       }
 
         LDLM_DEBUG(lock, "client-side enqueue END");
         EXIT;
 
         LDLM_DEBUG(lock, "client-side enqueue END");
         EXIT;
index 188609a..e72987d 100644 (file)
@@ -1398,3 +1398,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
                         LDLM_DEBUG_LIMIT(level, lock, "###");
         }
 }
                         LDLM_DEBUG_LIMIT(level, lock, "###");
         }
 }
+EXPORT_SYMBOL(ldlm_resource_dump);
index 30d2304..38559b8 100644 (file)
@@ -85,11 +85,8 @@ blkcnt_t dirty_cnt(struct inode *inode)
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
                     struct inode *inode, struct cl_object *clob, int agl)
 {
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
                     struct inode *inode, struct cl_object *clob, int agl)
 {
-        struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
         struct cl_inode_info *lli   = cl_i2info(inode);
         const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
         struct cl_inode_info *lli   = cl_i2info(inode);
         const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
-        struct ccc_io        *cio   = ccc_env_io(env);
-        struct cl_lock       *lock;
         int result;
 
         ENTRY;
         int result;
 
         ENTRY;
@@ -97,6 +94,9 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
         if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
                 CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
                if (lli->lli_has_smd) {
         if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
                 CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
                if (lli->lli_has_smd) {
+                       struct cl_lock *lock = ccc_env_lock(env);
+                       struct cl_lock_descr *descr = &lock->cll_descr;
+
                         /* NOTE: this looks like DLM lock request, but it may
                          *       not be one. Due to CEF_ASYNC flag (translated
                          *       to LDLM_FL_HAS_INTENT by osc), this is
                         /* NOTE: this looks like DLM lock request, but it may
                          *       not be one. Due to CEF_ASYNC flag (translated
                          *       to LDLM_FL_HAS_INTENT by osc), this is
@@ -112,11 +112,10 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
                          *       attributes are returned anyway. */
                         *descr = whole_file;
                         descr->cld_obj   = clob;
                          *       attributes are returned anyway. */
                         *descr = whole_file;
                         descr->cld_obj   = clob;
-                        descr->cld_mode  = CLM_PHANTOM;
-                        descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
-                        if (agl)
-                                descr->cld_enq_flags |= CEF_AGL;
-                        cio->cui_glimpse = 1;
+                       descr->cld_mode  = CLM_READ;
+                       descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
+                       if (agl)
+                               descr->cld_enq_flags |= CEF_AGL;
                        /*
                         * CEF_ASYNC is used because glimpse sub-locks cannot
                         * deadlock (because they never conflict with other
                        /*
                         * CEF_ASYNC is used because glimpse sub-locks cannot
                         * deadlock (because they never conflict with other
@@ -125,19 +124,11 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
                         * CEF_MUST protects glimpse lock from conversion into
                         * a lockless mode.
                         */
                         * CEF_MUST protects glimpse lock from conversion into
                         * a lockless mode.
                         */
-                       lock = cl_lock_request(env, io, descr, "glimpse",
-                                              current);
-                       cio->cui_glimpse = 0;
-
-                       if (lock == NULL)
-                               RETURN(0);
+                       result = cl_lock_request(env, io, lock);
+                       if (result < 0)
+                               RETURN(result);
 
 
-                       if (IS_ERR(lock))
-                               RETURN(PTR_ERR(lock));
-
-                       LASSERT(agl == 0);
-                       result = cl_wait(env, lock);
-                       if (result == 0) {
+                       if (!agl) {
                                cl_merge_lvb(env, inode);
                                if (cl_isize_read(inode) > 0 &&
                                    inode->i_blocks == 0) {
                                cl_merge_lvb(env, inode);
                                if (cl_isize_read(inode) > 0 &&
                                    inode->i_blocks == 0) {
@@ -149,9 +140,8 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
                                         */
                                        inode->i_blocks = dirty_cnt(inode);
                                }
                                         */
                                        inode->i_blocks = dirty_cnt(inode);
                                }
-                               cl_unuse(env, lock);
                        }
                        }
-                       cl_lock_release(env, lock, "glimpse", current);
+                       cl_lock_release(env, lock);
                } else {
                        CDEBUG(D_DLMTRACE, "No objects for inode\n");
                        cl_merge_lvb(env, inode);
                } else {
                        CDEBUG(D_DLMTRACE, "No objects for inode\n");
                        cl_merge_lvb(env, inode);
@@ -232,10 +222,7 @@ int cl_local_size(struct inode *inode)
 {
         struct lu_env           *env = NULL;
         struct cl_io            *io  = NULL;
 {
         struct lu_env           *env = NULL;
         struct cl_io            *io  = NULL;
-        struct ccc_thread_info  *cti;
         struct cl_object        *clob;
         struct cl_object        *clob;
-        struct cl_lock_descr    *descr;
-        struct cl_lock          *lock;
         int                      result;
         int                      refcheck;
 
         int                      result;
         int                      refcheck;
 
@@ -253,19 +240,16 @@ int cl_local_size(struct inode *inode)
         if (result > 0)
                 result = io->ci_result;
        else if (result == 0) {
         if (result > 0)
                 result = io->ci_result;
        else if (result == 0) {
-               cti = ccc_env_info(env);
-               descr = &cti->cti_descr;
+               struct cl_lock *lock = ccc_env_lock(env);
 
 
-               *descr = whole_file;
-               descr->cld_obj = clob;
-               lock = cl_lock_peek(env, io, descr, "localsize", current);
-               if (lock != NULL) {
+               lock->cll_descr = whole_file;
+               lock->cll_descr.cld_enq_flags = CEF_PEEK;
+               lock->cll_descr.cld_obj = clob;
+               result = cl_lock_request(env, io, lock);
+               if (result == 0) {
                        cl_merge_lvb(env, inode);
                        cl_merge_lvb(env, inode);
-                       cl_unuse(env, lock);
-                       cl_lock_release(env, lock, "localsize", current);
-                       result = 0;
-               } else
-                       result = -ENODATA;
+                       cl_lock_release(env, lock);
+               }
        }
        cl_io_fini(env, io);
        cl_env_put(env, &refcheck);
        }
        cl_io_fini(env, io);
        cl_env_put(env, &refcheck);
index 84ff1ee..6865d27 100644 (file)
@@ -535,12 +535,6 @@ int ccc_transient_page_prep(const struct lu_env *env,
  *
  */
 
  *
  */
 
-void ccc_lock_delete(const struct lu_env *env,
-                     const struct cl_lock_slice *slice)
-{
-        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-}
-
 void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
 {
         struct ccc_lock *clk = cl2ccc_lock(slice);
 void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
 {
         struct ccc_lock *clk = cl2ccc_lock(slice);
@@ -548,114 +542,13 @@ void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
 }
 
 int ccc_lock_enqueue(const struct lu_env *env,
 }
 
 int ccc_lock_enqueue(const struct lu_env *env,
-                     const struct cl_lock_slice *slice,
-                     struct cl_io *unused, __u32 enqflags)
-{
-        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-        return 0;
-}
-
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice)
+                    const struct cl_lock_slice *slice,
+                    struct cl_io *unused, struct cl_sync_io *anchor)
 {
        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
        return 0;
 }
 
 {
        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
        return 0;
 }
 
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-        return 0;
-}
-
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-        CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-        return 0;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
- * layer. This function is executed every time io finds an existing lock in
- * the lock cache while creating new lock. This function has to decide whether
- * cached lock "fits" into io.
- *
- * \param slice lock to be checked
- * \param io    IO that wants a lock.
- *
- * \see lov_lock_fits_into().
- */
-int ccc_lock_fits_into(const struct lu_env *env,
-                       const struct cl_lock_slice *slice,
-                       const struct cl_lock_descr *need,
-                       const struct cl_io *io)
-{
-        const struct cl_lock       *lock  = slice->cls_lock;
-        const struct cl_lock_descr *descr = &lock->cll_descr;
-        const struct ccc_io        *cio   = ccc_env_io(env);
-        int                         result;
-
-        ENTRY;
-        /*
-         * Work around DLM peculiarity: it assumes that glimpse
-         * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
-         * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
-         * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
-         * doesn't enqueue CLM_WRITE sub-locks.
-         */
-        if (cio->cui_glimpse)
-                result = descr->cld_mode != CLM_WRITE;
-
-        /*
-         * Also, don't match incomplete write locks for read, otherwise read
-         * would enqueue missing sub-locks in the write mode.
-         */
-        else if (need->cld_mode != descr->cld_mode)
-                result = lock->cll_state >= CLS_ENQUEUED;
-        else
-                result = 1;
-        RETURN(result);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
- * whenever lock state changes. Transfers object attributes, that might be
- * updated as a result of lock acquiring into inode.
- */
-void ccc_lock_state(const struct lu_env *env,
-                    const struct cl_lock_slice *slice,
-                    enum cl_lock_state state)
-{
-       struct cl_lock *lock = slice->cls_lock;
-       ENTRY;
-
-       /*
-        * Refresh inode attributes when the lock is moving into CLS_HELD
-        * state, and only when this is a result of real enqueue, rather than
-        * of finding lock in the cache.
-        */
-       if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
-               struct cl_object *obj;
-               struct inode     *inode;
-
-               obj   = slice->cls_obj;
-               inode = ccc_object_inode(obj);
-
-               /* vmtruncate() sets the i_size
-                * under both a DLM lock and the
-                * ll_inode_size_lock().  If we don't get the
-                * ll_inode_size_lock() here we can match the DLM lock and
-                * reset i_size.  generic_file_write can then trust the
-                * stale i_size when doing appending writes and effectively
-                * cancel the result of the truncate.  Getting the
-                * ll_inode_size_lock() after the enqueue maintains the DLM
-                * -> ll_inode_size_lock() acquiring order. */
-               if (lock->cll_descr.cld_start == 0 &&
-                   lock->cll_descr.cld_end == CL_PAGE_EOF)
-                       cl_merge_lvb(env, inode);
-       }
-       EXIT;
-}
-
 /*****************************************************************************
  *
  * io operations.
 /*****************************************************************************
  *
  * io operations.
index fa76dd0..4be56e6 100644 (file)
@@ -146,16 +146,17 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
        io->ci_ignore_layout = 1;
 
        rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        io->ci_ignore_layout = 1;
 
        rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-       if (rc) {
+       if (rc != 0) {
                cl_io_fini(env, io);
                cl_env_put(env, &refcheck);
                /* Does not make sense to take GL for released layout */
                if (rc > 0)
                        rc = -ENOTSUPP;
                return rc;
                cl_io_fini(env, io);
                cl_env_put(env, &refcheck);
                /* Does not make sense to take GL for released layout */
                if (rc > 0)
                        rc = -ENOTSUPP;
                return rc;
-        }
+       }
 
 
-        descr = &ccc_env_info(env)->cti_descr;
+       lock = ccc_env_lock(env);
+       descr = &lock->cll_descr;
         descr->cld_obj = obj;
         descr->cld_start = 0;
         descr->cld_end = CL_PAGE_EOF;
         descr->cld_obj = obj;
         descr->cld_start = 0;
         descr->cld_end = CL_PAGE_EOF;
@@ -165,11 +166,11 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
        enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
        descr->cld_enq_flags = enqflags;
 
        enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
        descr->cld_enq_flags = enqflags;
 
-       lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
-       if (IS_ERR(lock)) {
+       rc = cl_lock_request(env, io, lock);
+       if (rc < 0) {
                cl_io_fini(env, io);
                cl_env_put(env, &refcheck);
                cl_io_fini(env, io);
                cl_env_put(env, &refcheck);
-               return PTR_ERR(lock);
+               return rc;
        }
 
         cg->cg_env  = cl_env_get(&refcheck);
        }
 
         cg->cg_env  = cl_env_get(&refcheck);
@@ -195,8 +196,7 @@ void cl_put_grouplock(struct ccc_grouplock *cg)
        cl_env_implant(env, &refcheck);
        cl_env_put(env, &refcheck);
 
        cl_env_implant(env, &refcheck);
        cl_env_put(env, &refcheck);
 
-       cl_unuse(env, lock);
-       cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
+       cl_lock_release(env, lock);
        cl_io_fini(env, io);
        cl_env_put(env, NULL);
 }
        cl_io_fini(env, io);
        cl_env_put(env, NULL);
 }
index 73774ae..5749cf0 100644 (file)
@@ -170,8 +170,7 @@ static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
         * If this page holds the last refc of cl_object, the following
         * call path may cause reschedule:
         *   cl_page_put -> cl_page_free -> cl_object_put ->
         * If this page holds the last refc of cl_object, the following
         * call path may cause reschedule:
         *   cl_page_put -> cl_page_free -> cl_object_put ->
-        *     lu_object_put -> lu_object_free -> lov_delete_raid0 ->
-        *     cl_locks_prune.
+        *     lu_object_put -> lu_object_free -> lov_delete_raid0.
         *
         * However, the kernel can't get rid of this inode until all pages have
         * been cleaned up. Now that we hold page lock here, it's pretty safe
         *
         * However, the kernel can't get rid of this inode until all pages have
         * been cleaned up. Now that we hold page lock here, it's pretty safe
@@ -542,6 +541,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 
        /* To avoid deadlock, try to lock page first. */
        vmpage = grab_cache_page_nowait(mapping, index);
 
        /* To avoid deadlock, try to lock page first. */
        vmpage = grab_cache_page_nowait(mapping, index);
+
        if (unlikely(vmpage == NULL ||
                     PageDirty(vmpage) || PageWriteback(vmpage))) {
                struct ccc_io *cio = ccc_env_io(env);
        if (unlikely(vmpage == NULL ||
                     PageDirty(vmpage) || PageWriteback(vmpage))) {
                struct ccc_io *cio = ccc_env_io(env);
index b7fb4e5..31d775b 100644 (file)
@@ -233,7 +233,7 @@ static int vvp_mmap_locks(const struct lu_env *env,
         unsigned long           addr;
         unsigned long           seg;
         ssize_t                 count;
         unsigned long           addr;
         unsigned long           seg;
         ssize_t                 count;
-        int                     result;
+       int                     result = 0;
         ENTRY;
 
         LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
         ENTRY;
 
         LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
@@ -264,13 +264,13 @@ static int vvp_mmap_locks(const struct lu_env *env,
                         struct inode *inode = vma->vm_file->f_dentry->d_inode;
                         int flags = CEF_MUST;
 
                         struct inode *inode = vma->vm_file->f_dentry->d_inode;
                         int flags = CEF_MUST;
 
-                        if (ll_file_nolock(vma->vm_file)) {
-                                /*
-                                 * For no lock case, a lockless lock will be
-                                 * generated.
-                                 */
-                                flags = CEF_NEVER;
-                        }
+                       if (ll_file_nolock(vma->vm_file)) {
+                               /*
+                                * For no lock case is not allowed for mmap
+                                */
+                               result = -EINVAL;
+                               break;
+                       }
 
                         /*
                          * XXX: Required lock mode can be weakened: CIT_WRITE
 
                         /*
                          * XXX: Required lock mode can be weakened: CIT_WRITE
@@ -291,20 +291,20 @@ static int vvp_mmap_locks(const struct lu_env *env,
                                descr->cld_mode, descr->cld_start,
                                descr->cld_end);
 
                                descr->cld_mode, descr->cld_start,
                                descr->cld_end);
 
-                       if (result < 0) {
-                               up_read(&mm->mmap_sem);
-                               RETURN(result);
-                       }
+                       if (result < 0)
+                               break;
 
 
-                        if (vma->vm_end - addr >= count)
-                                break;
+                       if (vma->vm_end - addr >= count)
+                               break;
 
 
-                        count -= vma->vm_end - addr;
-                        addr = vma->vm_end;
-                }
-                up_read(&mm->mmap_sem);
-        }
-        RETURN(0);
+                       count -= vma->vm_end - addr;
+                       addr = vma->vm_end;
+               }
+               up_read(&mm->mmap_sem);
+               if (result < 0)
+                       break;
+       }
+       RETURN(result);
 }
 
 static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
 }
 
 static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
@@ -330,7 +330,7 @@ static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
 static int vvp_io_read_lock(const struct lu_env *env,
                             const struct cl_io_slice *ios)
 {
 static int vvp_io_read_lock(const struct lu_env *env,
                             const struct cl_io_slice *ios)
 {
-       struct cl_io            *io = ios->cis_io;
+       struct cl_io            *io  = ios->cis_io;
        struct cl_io_rw_common  *rd = &io->u.ci_rd.rd;
        int result;
 
        struct cl_io_rw_common  *rd = &io->u.ci_rd.rd;
        int result;
 
@@ -792,6 +792,7 @@ static int vvp_io_write_start(const struct lu_env *env,
                  * PARALLEL IO This has to be changed for parallel IO doing
                  * out-of-order writes.
                  */
                  * PARALLEL IO This has to be changed for parallel IO doing
                  * out-of-order writes.
                  */
+               ll_merge_lvb(env, inode);
                 pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
                 cio->cui_iocb->ki_pos = pos;
         } else {
                 pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
                 cio->cui_iocb->ki_pos = pos;
         } else {
index d198c50..d9611b9 100644 (file)
  *
  */
 
  *
  */
 
-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
-                                   const struct cl_lock_slice *slice)
-{
-       struct ccc_object *cob = cl2ccc(slice->cls_obj);
-
-       ENTRY;
-       RETURN(atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0);
-}
-
 static const struct cl_lock_operations vvp_lock_ops = {
 static const struct cl_lock_operations vvp_lock_ops = {
-        .clo_delete    = ccc_lock_delete,
-        .clo_fini      = ccc_lock_fini,
-        .clo_enqueue   = ccc_lock_enqueue,
-        .clo_wait      = ccc_lock_wait,
-       .clo_use       = ccc_lock_use,
-        .clo_unuse     = ccc_lock_unuse,
-        .clo_fits_into = ccc_lock_fits_into,
-        .clo_state     = ccc_lock_state,
-        .clo_weigh     = vvp_lock_weigh
+       .clo_fini      = ccc_lock_fini,
+       .clo_enqueue   = ccc_lock_enqueue
 };
 
 int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
 };
 
 int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
index 434e141..4150f74 100644 (file)
@@ -150,11 +150,15 @@ static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
        int rc;
        ENTRY;
 
        int rc;
        ENTRY;
 
-       rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_ALL, 1);
-       if (rc == 0)
-               truncate_inode_pages(inode->i_mapping, 0);
+       rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+       if (rc < 0) {
+               CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+                      PFID(lu_object_fid(&obj->co_lu)), rc);
+               RETURN(rc);
+       }
 
 
-       RETURN(rc);
+       truncate_inode_pages(inode->i_mapping, 0);
+       RETURN(0);
 }
 
 static const struct cl_object_operations vvp_ops = {
 }
 
 static const struct cl_object_operations vvp_ops = {
index e9d28f8..787e7eb 100644 (file)
@@ -280,87 +280,27 @@ struct lov_object {
 };
 
 /**
 };
 
 /**
- * Flags that top-lock can set on each of its sub-locks.
- */
-enum lov_sub_flags {
-        /** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
-        LSF_HELD = 1 << 0
-};
-
-/**
  * State lov_lock keeps for each sub-lock.
  */
 struct lov_lock_sub {
  * State lov_lock keeps for each sub-lock.
  */
 struct lov_lock_sub {
-        /** sub-lock itself */
-        struct lovsub_lock  *sub_lock;
-        /** An array of per-sub-lock flags, taken from enum lov_sub_flags */
-        unsigned             sub_flags;
-        int                  sub_stripe;
-        struct cl_lock_descr sub_descr;
-        struct cl_lock_descr sub_got;
+       /** sub-lock itself */
+       struct cl_lock          sub_lock;
+       /** Set if the sublock has ever been enqueued, meaning it may
+        * hold resources of underlying layers */
+       unsigned int            sub_is_enqueued:1,
+                               sub_initialized:1;
+       int                     sub_stripe;
 };
 
 /**
  * lov-specific lock state.
  */
 struct lov_lock {
 };
 
 /**
  * lov-specific lock state.
  */
 struct lov_lock {
-        struct cl_lock_slice   lls_cl;
-        /** Number of sub-locks in this lock */
-        int                    lls_nr;
-        /**
-         * Number of existing sub-locks.
-         */
-        unsigned               lls_nr_filled;
-        /**
-         * Set when sub-lock was canceled, while top-lock was being
-         * used, or unused.
-         */
-       unsigned int           lls_cancel_race:1,
-                               lls_ever_canceled:1;
-        /**
-         * An array of sub-locks
-         *
-         * There are two issues with managing sub-locks:
-         *
-         *     - sub-locks are concurrently canceled, and
-         *
-         *     - sub-locks are shared with other top-locks.
-         *
-         * To manage cancellation, top-lock acquires a hold on a sublock
-         * (lov_sublock_adopt()) when the latter is inserted into
-         * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
-         * when top-lock is going into CLS_CACHED state or destroyed. Hold
-         * prevents sub-lock from cancellation.
-         *
-         * Sub-lock sharing means, among other things, that top-lock that is
-         * in the process of creation (i.e., not yet inserted into lock list)
-         * is already accessible to other threads once at least one of its
-         * sub-locks is created, see lov_lock_sub_init().
-         *
-         * Sub-lock can be in one of the following states:
-         *
-         *     - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
-         *       sub-lock was either never created (top-lock is in CLS_NEW
-         *       state), or it was created, then canceled, then destroyed
-         *       (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
-         *
-         *     - sub-lock exists and is on
-         *       hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
-         *       normal state of a sub-lock in CLS_HELD and CLS_CACHED states
-         *       of a top-lock.
-         *
-         *     - sub-lock exists, but is not held by the top-lock. This
-         *       happens after top-lock released a hold on sub-locks before
-         *       going into cache (lov_lock_unuse()).
-         *
-         * \todo To support wide-striping, array has to be replaced with a set
-         * of queues to avoid scanning.
-         */
-        struct lov_lock_sub   *lls_sub;
-        /**
-         * Original description with which lock was enqueued.
-         */
-        struct cl_lock_descr   lls_orig;
+       struct cl_lock_slice    lls_cl;
+       /** Number of sub-locks in this lock */
+       int                     lls_nr;
+       /** sublock array */
+       struct lov_lock_sub     lls_sub[0];
 };
 
 struct lov_page {
 };
 
 struct lov_page {
@@ -446,7 +386,6 @@ struct lov_thread_info {
        struct ost_lvb          lti_lvb;
        struct cl_2queue        lti_cl2q;
        struct cl_page_list     lti_plist;
        struct ost_lvb          lti_lvb;
        struct cl_2queue        lti_cl2q;
        struct cl_page_list     lti_plist;
-       struct cl_lock_closure  lti_closure;
        wait_queue_t            lti_waiter;
        struct cl_attr          lti_attr;
 };
        wait_queue_t            lti_waiter;
        struct cl_attr          lti_attr;
 };
index 9de5193..8ea9185 100644 (file)
@@ -144,19 +144,16 @@ static void *lov_key_init(const struct lu_context *ctx,
        struct lov_thread_info *info;
 
        OBD_SLAB_ALLOC_PTR_GFP(info, lov_thread_kmem, GFP_NOFS);
        struct lov_thread_info *info;
 
        OBD_SLAB_ALLOC_PTR_GFP(info, lov_thread_kmem, GFP_NOFS);
-       if (info != NULL)
-               INIT_LIST_HEAD(&info->lti_closure.clc_list);
-       else
+       if (info == NULL)
                info = ERR_PTR(-ENOMEM);
        return info;
 }
 
 static void lov_key_fini(const struct lu_context *ctx,
                info = ERR_PTR(-ENOMEM);
        return info;
 }
 
 static void lov_key_fini(const struct lu_context *ctx,
-                         struct lu_context_key *key, void *data)
+                        struct lu_context_key *key, void *data)
 {
 {
-        struct lov_thread_info *info = data;
-       LINVRNT(list_empty(&info->lti_closure.clc_list));
-        OBD_SLAB_FREE_PTR(info, lov_thread_kmem);
+       struct lov_thread_info *info = data;
+       OBD_SLAB_FREE_PTR(info, lov_thread_kmem);
 }
 
 struct lu_context_key lov_key = {
 }
 
 struct lu_context_key lov_key = {
index 1d1d980..3166dda 100644 (file)
  *  @{
  */
 
  *  @{
  */
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-                                               struct cl_lock *parent);
-
-static int lov_lock_unuse(const struct lu_env *env,
-                          const struct cl_lock_slice *slice);
 /*****************************************************************************
  *
  * Lov lock operations.
 /*****************************************************************************
  *
  * Lov lock operations.
@@ -58,7 +53,7 @@ static int lov_lock_unuse(const struct lu_env *env,
  */
 
 static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
  */
 
 static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
-                                                   struct cl_lock *parent,
+                                                  const struct cl_lock *parent,
                                                    struct lov_lock_sub *lls)
 {
         struct lov_sublock_env *subenv;
                                                    struct lov_lock_sub *lls)
 {
         struct lov_sublock_env *subenv;
@@ -100,188 +95,24 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv)
                 lov_sub_put(subenv->lse_sub);
 }
 
                 lov_sub_put(subenv->lse_sub);
 }
 
-static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
-                              struct cl_lock *sublock, int idx,
-                              struct lov_lock_link *link)
-{
-        struct lovsub_lock *lsl;
-        struct cl_lock     *parent = lck->lls_cl.cls_lock;
-        int                 rc;
-
-        LASSERT(cl_lock_is_mutexed(parent));
-        LASSERT(cl_lock_is_mutexed(sublock));
-        ENTRY;
-
-        lsl = cl2sub_lock(sublock);
-        /*
-         * check that sub-lock doesn't have lock link to this top-lock.
-         */
-        LASSERT(lov_lock_link_find(env, lck, lsl) == NULL);
-        LASSERT(idx < lck->lls_nr);
-
-        lck->lls_sub[idx].sub_lock = lsl;
-        lck->lls_nr_filled++;
-        LASSERT(lck->lls_nr_filled <= lck->lls_nr);
-       list_add_tail(&link->lll_list, &lsl->lss_parents);
-        link->lll_idx = idx;
-        link->lll_super = lck;
-        cl_lock_get(parent);
-        lu_ref_add(&parent->cll_reference, "lov-child", sublock);
-        lck->lls_sub[idx].sub_flags |= LSF_HELD;
-        cl_lock_user_add(env, sublock);
-
-        rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
-        LASSERT(rc == 0); /* there is no way this can fail, currently */
-        EXIT;
-}
-
-static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
-                                        const struct cl_io *io,
-                                        struct lov_lock *lck,
-                                        int idx, struct lov_lock_link **out)
+static int lov_sublock_init(const struct lu_env *env,
+                           const struct cl_lock *parent,
+                           struct lov_lock_sub *lls)
 {
 {
-       struct cl_lock       *sublock;
-       struct cl_lock       *parent;
-       struct lov_lock_link *link;
-
-       LASSERT(idx < lck->lls_nr);
+       struct lov_sublock_env *subenv;
+       int result;
        ENTRY;
 
        ENTRY;
 
-       OBD_SLAB_ALLOC_PTR_GFP(link, lov_lock_link_kmem, GFP_NOFS);
-       if (link != NULL) {
-               struct lov_sublock_env *subenv;
-               struct lov_lock_sub  *lls;
-               struct cl_lock_descr *descr;
-
-               parent = lck->lls_cl.cls_lock;
-               lls    = &lck->lls_sub[idx];
-               descr  = &lls->sub_got;
-
-               subenv = lov_sublock_env_get(env, parent, lls);
-               if (!IS_ERR(subenv)) {
-                       /* CAVEAT: Don't try to add a field in lov_lock_sub
-                        * to remember the subio. This is because lock is able
-                        * to be cached, but this is not true for IO. This
-                        * further means a sublock might be referenced in
-                        * different io context. -jay */
-
-                       sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
-                                              descr, "lov-parent", parent);
-                       lov_sublock_env_put(subenv);
-               } else {
-                       /* error occurs. */
-                       sublock = (void *)subenv;
-               }
-
-               if (!IS_ERR(sublock))
-                       *out = link;
-               else
-                       OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
-       } else
-               sublock = ERR_PTR(-ENOMEM);
-       RETURN(sublock);
-}
-
-static void lov_sublock_unlock(const struct lu_env *env,
-                               struct lovsub_lock *lsl,
-                               struct cl_lock_closure *closure,
-                               struct lov_sublock_env *subenv)
-{
-        ENTRY;
-        lov_sublock_env_put(subenv);
-        lsl->lss_active = NULL;
-        cl_lock_disclosure(env, closure);
-        EXIT;
-}
-
-static int lov_sublock_lock(const struct lu_env *env,
-                            struct lov_lock *lck,
-                            struct lov_lock_sub *lls,
-                            struct cl_lock_closure *closure,
-                            struct lov_sublock_env **lsep)
-{
-        struct lovsub_lock *sublock;
-        struct cl_lock     *child;
-        int                 result = 0;
-        ENTRY;
-
-       LASSERT(list_empty(&closure->clc_list));
-
-        sublock = lls->sub_lock;
-        child = sublock->lss_cl.cls_lock;
-        result = cl_lock_closure_build(env, child, closure);
-        if (result == 0) {
-                struct cl_lock *parent = closure->clc_origin;
-
-                LASSERT(cl_lock_is_mutexed(child));
-                sublock->lss_active = parent;
-
-                if (unlikely((child->cll_state == CLS_FREEING) ||
-                             (child->cll_flags & CLF_CANCELLED))) {
-                        struct lov_lock_link *link;
-                        /*
-                         * we could race with lock deletion which temporarily
-                         * put the lock in freeing state, bug 19080.
-                         */
-                        LASSERT(!(lls->sub_flags & LSF_HELD));
-
-                        link = lov_lock_link_find(env, lck, sublock);
-                        LASSERT(link != NULL);
-                        lov_lock_unlink(env, link, sublock);
-                        lov_sublock_unlock(env, sublock, closure, NULL);
-                        lck->lls_cancel_race = 1;
-                        result = CLO_REPEAT;
-                } else if (lsep) {
-                        struct lov_sublock_env *subenv;
-                        subenv = lov_sublock_env_get(env, parent, lls);
-                        if (IS_ERR(subenv)) {
-                                lov_sublock_unlock(env, sublock,
-                                                   closure, NULL);
-                                result = PTR_ERR(subenv);
-                        } else {
-                                *lsep = subenv;
-                        }
-                }
-        }
-        RETURN(result);
-}
-
-/**
- * Updates the result of a top-lock operation from a result of sub-lock
- * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
- * over sub-locks and lov_subresult() is used to calculate return value of a
- * top-operation. To this end, possible return values of sub-operations are
- * ordered as
- *
- *     - 0                  success
- *     - CLO_WAIT           wait for event
- *     - CLO_REPEAT         repeat top-operation
- *     - -ne                fundamental error
- *
- * Top-level return code can only go down through this list. CLO_REPEAT
- * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
- * has to be rechecked by the upper layer.
- */
-static int lov_subresult(int result, int rc)
-{
-        int result_rank;
-        int rc_rank;
-
-        ENTRY;
-
-       LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
-                "result = %d\n", result);
-       LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
-                "rc = %d\n", rc);
-        CLASSERT(CLO_WAIT < CLO_REPEAT);
-
-        /* calculate ranks in the ordering above */
-        result_rank = result < 0 ? 1 + CLO_REPEAT : result;
-        rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
-
-        if (result_rank < rc_rank)
-                result = rc;
-        RETURN(result);
+       subenv = lov_sublock_env_get(env, parent, lls);
+       if (!IS_ERR(subenv)) {
+               result = cl_lock_init(subenv->lse_env, &lls->sub_lock,
+                                     subenv->lse_io);
+               lov_sublock_env_put(subenv);
+       } else {
+               /* error occurs. */
+               result = PTR_ERR(subenv);
+       }
+       RETURN(result);
 }
 
 /**
 }
 
 /**
@@ -292,8 +123,9 @@ static int lov_subresult(int result, int rc)
  * fact that top-lock (that is being created) can be accessed concurrently
  * through already created sub-locks (possibly shared with other top-locks).
  */
  * fact that top-lock (that is being created) can be accessed concurrently
  * through already created sub-locks (possibly shared with other top-locks).
  */
-static int lov_lock_sub_init(const struct lu_env *env,
-                             struct lov_lock *lck, const struct cl_io *io)
+static struct lov_lock *lov_lock_sub_init(const struct lu_env *env,
+                                         const struct cl_object *obj,
+                                         struct cl_lock *lock)
 {
         int result = 0;
         int i;
 {
         int result = 0;
         int i;
@@ -303,251 +135,94 @@ static int lov_lock_sub_init(const struct lu_env *env,
         obd_off file_start;
         obd_off file_end;
 
         obd_off file_start;
         obd_off file_end;
 
-        struct lov_object       *loo    = cl2lov(lck->lls_cl.cls_obj);
-        struct lov_layout_raid0 *r0     = lov_r0(loo);
-        struct cl_lock          *parent = lck->lls_cl.cls_lock;
+       struct lov_object       *loo    = cl2lov(obj);
+       struct lov_layout_raid0 *r0     = lov_r0(loo);
+       struct lov_lock         *lovlck;
 
 
-        ENTRY;
+       ENTRY;
 
 
-        lck->lls_orig = parent->cll_descr;
-        file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
-        file_end   = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
+       file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start);
+       file_end   = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1;
 
         for (i = 0, nr = 0; i < r0->lo_nr; i++) {
                 /*
                  * XXX for wide striping smarter algorithm is desirable,
                  * breaking out of the loop, early.
                  */
 
         for (i = 0, nr = 0; i < r0->lo_nr; i++) {
                 /*
                  * XXX for wide striping smarter algorithm is desirable,
                  * breaking out of the loop, early.
                  */
-               if (likely(r0->lo_sub[i] != NULL) &&
+               if (likely(r0->lo_sub[i] != NULL) && /* spare layout */
                    lov_stripe_intersects(loo->lo_lsm, i,
                                          file_start, file_end, &start, &end))
                    lov_stripe_intersects(loo->lo_lsm, i,
                                          file_start, file_end, &start, &end))
-                        nr++;
-        }
-        LASSERT(nr > 0);
-        OBD_ALLOC_LARGE(lck->lls_sub, nr * sizeof lck->lls_sub[0]);
-        if (lck->lls_sub == NULL)
-                RETURN(-ENOMEM);
+                       nr++;
+       }
+       LASSERT(nr > 0);
 
 
-        lck->lls_nr = nr;
-        /*
-         * First, fill in sub-lock descriptions in
-         * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
-         * (called below in this function, and by lov_lock_enqueue()) to
-         * create sub-locks. At this moment, no other thread can access
-         * top-lock.
-         */
-        for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
+       OBD_ALLOC_LARGE(lovlck, offsetof(struct lov_lock, lls_sub[nr]));
+       if (lovlck == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       lovlck->lls_nr = nr;
+       for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
                if (likely(r0->lo_sub[i] != NULL) &&
                    lov_stripe_intersects(loo->lo_lsm, i,
                                          file_start, file_end, &start, &end)) {
                if (likely(r0->lo_sub[i] != NULL) &&
                    lov_stripe_intersects(loo->lo_lsm, i,
                                          file_start, file_end, &start, &end)) {
-                        struct cl_lock_descr *descr;
-
-                        descr = &lck->lls_sub[nr].sub_descr;
-
-                        LASSERT(descr->cld_obj == NULL);
-                        descr->cld_obj   = lovsub2cl(r0->lo_sub[i]);
-                        descr->cld_start = cl_index(descr->cld_obj, start);
-                        descr->cld_end   = cl_index(descr->cld_obj, end);
-                        descr->cld_mode  = parent->cll_descr.cld_mode;
-                        descr->cld_gid   = parent->cll_descr.cld_gid;
-                        descr->cld_enq_flags   = parent->cll_descr.cld_enq_flags;
-                        /* XXX has no effect */
-                        lck->lls_sub[nr].sub_got = *descr;
-                        lck->lls_sub[nr].sub_stripe = i;
-                        nr++;
-                }
-        }
-        LASSERT(nr == lck->lls_nr);
-
-        /*
-         * Some sub-locks can be missing at this point. This is not a problem,
-         * because enqueue will create them anyway. Main duty of this function
-         * is to fill in sub-lock descriptions in a race free manner.
-         */
-        RETURN(result);
-}
-
-static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
-                               int i, int deluser, int rc)
-{
-        struct cl_lock *parent = lck->lls_cl.cls_lock;
+                       struct lov_lock_sub *lls = &lovlck->lls_sub[nr];
+                       struct cl_lock_descr *descr;
 
 
-        LASSERT(cl_lock_is_mutexed(parent));
-        ENTRY;
+                       descr = &lls->sub_lock.cll_descr;
 
 
-        if (lck->lls_sub[i].sub_flags & LSF_HELD) {
-                struct cl_lock    *sublock;
-                int dying;
+                       LASSERT(descr->cld_obj == NULL);
+                       descr->cld_obj   = lovsub2cl(r0->lo_sub[i]);
+                       descr->cld_start = cl_index(descr->cld_obj, start);
+                       descr->cld_end   = cl_index(descr->cld_obj, end);
+                       descr->cld_mode  = lock->cll_descr.cld_mode;
+                       descr->cld_gid   = lock->cll_descr.cld_gid;
+                       descr->cld_enq_flags = lock->cll_descr.cld_enq_flags;
 
 
-                LASSERT(lck->lls_sub[i].sub_lock != NULL);
-                sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-                LASSERT(cl_lock_is_mutexed(sublock));
+                       lls->sub_stripe = i;
 
 
-                lck->lls_sub[i].sub_flags &= ~LSF_HELD;
-                if (deluser)
-                        cl_lock_user_del(env, sublock);
-                /*
-                 * If the last hold is released, and cancellation is pending
-                 * for a sub-lock, release parent mutex, to avoid keeping it
-                 * while sub-lock is being paged out.
-                 */
-                dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
-                         sublock->cll_descr.cld_mode == CLM_GROUP ||
-                         (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
-                        sublock->cll_holds == 1;
-                if (dying)
-                        cl_lock_mutex_put(env, parent);
-                cl_lock_unhold(env, sublock, "lov-parent", parent);
-                if (dying) {
-                        cl_lock_mutex_get(env, parent);
-                        rc = lov_subresult(rc, CLO_REPEAT);
-                }
-                /*
-                 * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
-                 * not backed by a reference on a
-                 * sub-lock. lovsub_lock_delete() will clear
-                 * lck->lls_sub[i].sub_lock under semaphores, just before
-                 * sub-lock is destroyed.
-                 */
-        }
-        RETURN(rc);
-}
-
-static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
-                             int i)
-{
-        struct cl_lock *parent = lck->lls_cl.cls_lock;
+                       /* initialize sub lock */
+                       result = lov_sublock_init(env, lock, lls);
+                       if (result < 0)
+                               break;
 
 
-        LASSERT(cl_lock_is_mutexed(parent));
-        ENTRY;
+                       lls->sub_initialized = 1;
+                       nr++;
+               }
+       }
+       LASSERT(ergo(result == 0, nr == lovlck->lls_nr));
 
 
-        if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
-                struct cl_lock *sublock;
+       if (result != 0) {
+               for (i = 0; i < nr; ++i) {
+                       if (!lovlck->lls_sub[i].sub_initialized)
+                               break;
 
 
-                LASSERT(lck->lls_sub[i].sub_lock != NULL);
-                sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-                LASSERT(cl_lock_is_mutexed(sublock));
-                LASSERT(sublock->cll_state != CLS_FREEING);
+                       cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
+               }
 
 
-                lck->lls_sub[i].sub_flags |= LSF_HELD;
+               OBD_FREE_LARGE(lovlck,
+                               offsetof(struct lov_lock, lls_sub[nr]));
+               lovlck = ERR_PTR(result);
+       }
 
 
-                cl_lock_get_trust(sublock);
-                cl_lock_hold_add(env, sublock, "lov-parent", parent);
-                cl_lock_user_add(env, sublock);
-                cl_lock_put(env, sublock);
-        }
-        EXIT;
+       RETURN(lovlck);
 }
 
 static void lov_lock_fini(const struct lu_env *env,
                           struct cl_lock_slice *slice)
 {
 }
 
 static void lov_lock_fini(const struct lu_env *env,
                           struct cl_lock_slice *slice)
 {
-        struct lov_lock *lck;
-        int i;
-
-        ENTRY;
-        lck = cl2lov_lock(slice);
-        LASSERT(lck->lls_nr_filled == 0);
-        if (lck->lls_sub != NULL) {
-                for (i = 0; i < lck->lls_nr; ++i)
-                        /*
-                         * No sub-locks exists at this point, as sub-lock has
-                         * a reference on its parent.
-                         */
-                        LASSERT(lck->lls_sub[i].sub_lock == NULL);
-                OBD_FREE_LARGE(lck->lls_sub,
-                               lck->lls_nr * sizeof lck->lls_sub[0]);
-        }
-        OBD_SLAB_FREE_PTR(lck, lov_lock_kmem);
-        EXIT;
-}
-
-static int lov_lock_enqueue_wait(const struct lu_env *env,
-                                 struct lov_lock *lck,
-                                 struct cl_lock *sublock)
-{
-        struct cl_lock *lock = lck->lls_cl.cls_lock;
-        int             result;
-        ENTRY;
-
-        LASSERT(cl_lock_is_mutexed(lock));
-
-        cl_lock_mutex_put(env, lock);
-        result = cl_lock_enqueue_wait(env, sublock, 0);
-        cl_lock_mutex_get(env, lock);
-        RETURN(result ?: CLO_REPEAT);
-}
-
-/**
- * Tries to advance a state machine of a given sub-lock toward enqueuing of
- * the top-lock.
- *
- * \retval 0 if state-transition can proceed
- * \retval -ve otherwise.
- */
-static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
-                                struct cl_lock *sublock,
-                                struct cl_io *io, __u32 enqflags, int last)
-{
-        int result;
-        ENTRY;
+       struct lov_lock *lovlck;
+       int i;
 
 
-        /* first, try to enqueue a sub-lock ... */
-        result = cl_enqueue_try(env, sublock, io, enqflags);
-       if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
-               /* if it is enqueued, try to `wait' on it---maybe it's already
-                * granted */
-               result = cl_wait_try(env, sublock);
-               if (result == CLO_REENQUEUED)
-                       result = CLO_WAIT;
+       ENTRY;
+       lovlck = cl2lov_lock(slice);
+       for (i = 0; i < lovlck->lls_nr; ++i) {
+               LASSERT(!lovlck->lls_sub[i].sub_is_enqueued);
+               if (lovlck->lls_sub[i].sub_initialized)
+                       cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
        }
        }
-        /*
-         * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
-         * parallel, otherwise---enqueue has to wait until sub-lock is granted
-         * before proceeding to the next one.
-         */
-        if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
-            (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
-                result = 0;
-        RETURN(result);
-}
-
-/**
- * Helper function for lov_lock_enqueue() that creates missing sub-lock.
- */
-static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
-                            struct cl_io *io, struct lov_lock *lck, int idx)
-{
-       struct lov_lock_link *link = NULL;
-        struct cl_lock       *sublock;
-        int                   result;
-
-        LASSERT(parent->cll_depth == 1);
-        cl_lock_mutex_put(env, parent);
-        sublock = lov_sublock_alloc(env, io, lck, idx, &link);
-        if (!IS_ERR(sublock))
-                cl_lock_mutex_get(env, sublock);
-        cl_lock_mutex_get(env, parent);
-
-        if (!IS_ERR(sublock)) {
-                cl_lock_get_trust(sublock);
-                if (parent->cll_state == CLS_QUEUING &&
-                    lck->lls_sub[idx].sub_lock == NULL) {
-                        lov_sublock_adopt(env, lck, sublock, idx, link);
-                } else {
-                        OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
-                        /* other thread allocated sub-lock, or enqueue is no
-                         * longer going on */
-                        cl_lock_mutex_put(env, parent);
-                        cl_lock_unhold(env, sublock, "lov-parent", parent);
-                        cl_lock_mutex_get(env, parent);
-                }
-                cl_lock_mutex_put(env, sublock);
-                cl_lock_put(env, sublock);
-                result = CLO_REPEAT;
-        } else
-                result = PTR_ERR(sublock);
-        return result;
+       OBD_FREE_LARGE(lovlck,
+                      offsetof(struct lov_lock, lls_sub[lovlck->lls_nr]));
+       EXIT;
 }
 
 /**
 }
 
 /**
@@ -558,595 +233,65 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
  * and concurrent sub-lock cancellations.
  */
 static int lov_lock_enqueue(const struct lu_env *env,
  * and concurrent sub-lock cancellations.
  */
 static int lov_lock_enqueue(const struct lu_env *env,
-                            const struct cl_lock_slice *slice,
-                            struct cl_io *io, __u32 enqflags)
+                           const struct cl_lock_slice *slice,
+                           struct cl_io *io, struct cl_sync_io *anchor)
 {
 {
-        struct cl_lock         *lock    = slice->cls_lock;
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, lock);
-        int i;
-        int result;
-        enum cl_lock_state minstate;
-
-        ENTRY;
+       struct cl_lock          *lock   = slice->cls_lock;
+       struct lov_lock         *lovlck = cl2lov_lock(slice);
+       int                     i;
+       int                     rc      = 0;
 
 
-        for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
-                int rc;
-                struct lovsub_lock     *sub;
-                struct lov_lock_sub    *lls;
-                struct cl_lock         *sublock;
-                struct lov_sublock_env *subenv;
-
-                if (lock->cll_state != CLS_QUEUING) {
-                        /*
-                         * Lock might have left QUEUING state if previous
-                         * iteration released its mutex. Stop enqueing in this
-                         * case and let the upper layer to decide what to do.
-                         */
-                        LASSERT(i > 0 && result != 0);
-                        break;
-                }
-
-                lls = &lck->lls_sub[i];
-                sub = lls->sub_lock;
-                /*
-                 * Sub-lock might have been canceled, while top-lock was
-                 * cached.
-                 */
-                if (sub == NULL) {
-                        result = lov_sublock_fill(env, lock, io, lck, i);
-                        /* lov_sublock_fill() released @lock mutex,
-                         * restart. */
-                        break;
-                }
-                sublock = sub->lss_cl.cls_lock;
-                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-                if (rc == 0) {
-                        lov_sublock_hold(env, lck, i);
-                        rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
-                                                  subenv->lse_io, enqflags,
-                                                  i == lck->lls_nr - 1);
-                        minstate = min(minstate, sublock->cll_state);
-                        if (rc == CLO_WAIT) {
-                                switch (sublock->cll_state) {
-                                case CLS_QUEUING:
-                                        /* take recursive mutex, the lock is
-                                         * released in lov_lock_enqueue_wait.
-                                         */
-                                        cl_lock_mutex_get(env, sublock);
-                                        lov_sublock_unlock(env, sub, closure,
-                                                           subenv);
-                                        rc = lov_lock_enqueue_wait(env, lck,
-                                                                   sublock);
-                                        break;
-                                case CLS_CACHED:
-                                       cl_lock_get(sublock);
-                                        /* take recursive mutex of sublock */
-                                        cl_lock_mutex_get(env, sublock);
-                                       /* need to release all locks in closure
-                                        * otherwise it may deadlock. LU-2683.*/
-                                        lov_sublock_unlock(env, sub, closure,
-                                                           subenv);
-                                       /* sublock and parent are held. */
-                                        rc = lov_sublock_release(env, lck, i,
-                                                                 1, rc);
-                                       cl_lock_mutex_put(env, sublock);
-                                       cl_lock_put(env, sublock);
-                                       break;
-                                default:
-                                        lov_sublock_unlock(env, sub, closure,
-                                                           subenv);
-                                        break;
-                                }
-                        } else {
-                                LASSERT(sublock->cll_conflict == NULL);
-                                lov_sublock_unlock(env, sub, closure, subenv);
-                        }
-                }
-                result = lov_subresult(result, rc);
-                if (result != 0)
-                        break;
-        }
-        cl_lock_closure_fini(closure);
-        RETURN(result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT);
-}
-
-static int lov_lock_unuse(const struct lu_env *env,
-                          const struct cl_lock_slice *slice)
-{
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-        int i;
-        int result;
-
-        ENTRY;
-
-        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-                int rc;
-                struct lovsub_lock     *sub;
-                struct cl_lock         *sublock;
-                struct lov_lock_sub    *lls;
-                struct lov_sublock_env *subenv;
-
-                /* top-lock state cannot change concurrently, because single
-                 * thread (one that released the last hold) carries unlocking
-                 * to the completion. */
-                LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-                lls = &lck->lls_sub[i];
-                sub = lls->sub_lock;
-                if (sub == NULL)
-                        continue;
+       ENTRY;
 
 
-               sublock = sub->lss_cl.cls_lock;
-               rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-               if (rc == 0) {
-                       if (!(lls->sub_flags & LSF_HELD)) {
-                               lov_sublock_unlock(env, sub, closure, subenv);
-                               continue;
-                       }
+       for (i = 0; i < lovlck->lls_nr; ++i) {
+               struct lov_lock_sub     *lls = &lovlck->lls_sub[i];
+               struct lov_sublock_env  *subenv;
 
 
-                       switch(sublock->cll_state) {
-                       case CLS_HELD:
-                               rc = cl_unuse_try(subenv->lse_env, sublock);
-                               lov_sublock_release(env, lck, i, 0, 0);
-                               break;
-                       default:
-                               cl_lock_cancel(subenv->lse_env, sublock);
-                               lov_sublock_release(env, lck, i, 1, 0);
-                               break;
-                       }
-                       lov_sublock_unlock(env, sub, closure, subenv);
+               subenv = lov_sublock_env_get(env, lock, lls);
+               if (IS_ERR(subenv)) {
+                       rc = PTR_ERR(subenv);
+                       break;
                }
                }
-               result = lov_subresult(result, rc);
-        }
-
-        if (result == 0 && lck->lls_cancel_race) {
-                lck->lls_cancel_race = 0;
-                result = -ESTALE;
-        }
-        cl_lock_closure_fini(closure);
-        RETURN(result);
-}
-
-
-static void lov_lock_cancel(const struct lu_env *env,
-                           const struct cl_lock_slice *slice)
-{
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-        int i;
-        int result;
-
-        ENTRY;
-
-        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-                int rc;
-                struct lovsub_lock     *sub;
-                struct cl_lock         *sublock;
-                struct lov_lock_sub    *lls;
-                struct lov_sublock_env *subenv;
-
-                /* top-lock state cannot change concurrently, because single
-                 * thread (one that released the last hold) carries unlocking
-                 * to the completion. */
-                lls = &lck->lls_sub[i];
-                sub = lls->sub_lock;
-                if (sub == NULL)
-                        continue;
-
-                sublock = sub->lss_cl.cls_lock;
-                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-                if (rc == 0) {
-                        if (!(lls->sub_flags & LSF_HELD)) {
-                                lov_sublock_unlock(env, sub, closure, subenv);
-                                continue;
-                        }
-
-                        switch(sublock->cll_state) {
-                        case CLS_HELD:
-                               rc = cl_unuse_try(subenv->lse_env, sublock);
-                                lov_sublock_release(env, lck, i, 0, 0);
-                                break;
-                        default:
-                               cl_lock_cancel(subenv->lse_env, sublock);
-                                lov_sublock_release(env, lck, i, 1, 0);
-                                break;
-                        }
-                        lov_sublock_unlock(env, sub, closure, subenv);
-                }
 
 
-                if (rc == CLO_REPEAT) {
-                        --i;
-                        continue;
-                }
-
-                result = lov_subresult(result, rc);
-        }
-
-        if (result)
-                CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
-                              "lov_lock_cancel fails with %d.\n", result);
-
-        cl_lock_closure_fini(closure);
-}
-
-static int lov_lock_wait(const struct lu_env *env,
-                         const struct cl_lock_slice *slice)
-{
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-        enum cl_lock_state      minstate;
-        int                     reenqueued;
-        int                     result;
-        int                     i;
-
-        ENTRY;
-
-again:
-        for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
-             i < lck->lls_nr; ++i) {
-                int rc;
-                struct lovsub_lock     *sub;
-                struct cl_lock         *sublock;
-                struct lov_lock_sub    *lls;
-                struct lov_sublock_env *subenv;
-
-                lls = &lck->lls_sub[i];
-                sub = lls->sub_lock;
-                LASSERT(sub != NULL);
-                sublock = sub->lss_cl.cls_lock;
-                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-                if (rc == 0) {
-                        LASSERT(sublock->cll_state >= CLS_ENQUEUED);
-                        if (sublock->cll_state < CLS_HELD)
-                                rc = cl_wait_try(env, sublock);
-
-                        minstate = min(minstate, sublock->cll_state);
-                        lov_sublock_unlock(env, sub, closure, subenv);
-                }
-                if (rc == CLO_REENQUEUED) {
-                        reenqueued++;
-                        rc = 0;
-                }
-                result = lov_subresult(result, rc);
-                if (result != 0)
-                        break;
-        }
-        /* Each sublock only can be reenqueued once, so will not loop for
-         * ever. */
-        if (result == 0 && reenqueued != 0)
-                goto again;
-        cl_lock_closure_fini(closure);
-        RETURN(result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT);
-}
+               rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io,
+                                    &lls->sub_lock, anchor);
+               lov_sublock_env_put(subenv);
+               if (rc != 0)
+                       break;
 
 
-static int lov_lock_use(const struct lu_env *env,
-                        const struct cl_lock_slice *slice)
-{
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-        int                     result;
-        int                     i;
-
-        LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-        ENTRY;
-
-        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-                int rc;
-                struct lovsub_lock     *sub;
-                struct cl_lock         *sublock;
-                struct lov_lock_sub    *lls;
-                struct lov_sublock_env *subenv;
-
-                LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
-                lls = &lck->lls_sub[i];
-                sub = lls->sub_lock;
-                if (sub == NULL) {
-                        /*
-                         * Sub-lock might have been canceled, while top-lock was
-                         * cached.
-                         */
-                        result = -ESTALE;
-                        break;
-                }
-
-                sublock = sub->lss_cl.cls_lock;
-                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-                if (rc == 0) {
-                        LASSERT(sublock->cll_state != CLS_FREEING);
-                        lov_sublock_hold(env, lck, i);
-                        if (sublock->cll_state == CLS_CACHED) {
-                                rc = cl_use_try(subenv->lse_env, sublock, 0);
-                                if (rc != 0)
-                                        rc = lov_sublock_release(env, lck,
-                                                                 i, 1, rc);
-                        } else if (sublock->cll_state == CLS_NEW) {
-                                /* Sub-lock might have been canceled, while
-                                 * top-lock was cached. */
-                                result = -ESTALE;
-                                lov_sublock_release(env, lck, i, 1, result);
-                        }
-                        lov_sublock_unlock(env, sub, closure, subenv);
-                }
-                result = lov_subresult(result, rc);
-                if (result != 0)
-                        break;
-        }
-
-        if (lck->lls_cancel_race) {
-                /*
-                 * If there is unlocking happened at the same time, then
-                 * sublock_lock state should be FREEING, and lov_sublock_lock
-                 * should return CLO_REPEAT. In this case, it should return
-                 * ESTALE, and up layer should reset the lock state to be NEW.
-                 */
-                lck->lls_cancel_race = 0;
-                LASSERT(result != 0);
-                result = -ESTALE;
-        }
-        cl_lock_closure_fini(closure);
-        RETURN(result);
-}
-
-#if 0
-static int lock_lock_multi_match()
-{
-        struct cl_lock          *lock    = slice->cls_lock;
-        struct cl_lock_descr    *subneed = &lov_env_info(env)->lti_ldescr;
-        struct lov_object       *loo     = cl2lov(lov->lls_cl.cls_obj);
-        struct lov_layout_raid0 *r0      = lov_r0(loo);
-        struct lov_lock_sub     *sub;
-        struct cl_object        *subobj;
-        obd_off  fstart;
-        obd_off  fend;
-        obd_off  start;
-        obd_off  end;
-        int i;
-
-        fstart = cl_offset(need->cld_obj, need->cld_start);
-        fend   = cl_offset(need->cld_obj, need->cld_end + 1) - 1;
-        subneed->cld_mode = need->cld_mode;
-        cl_lock_mutex_get(env, lock);
-        for (i = 0; i < lov->lls_nr; ++i) {
-                sub = &lov->lls_sub[i];
-                if (sub->sub_lock == NULL)
-                        continue;
-                subobj = sub->sub_descr.cld_obj;
-               if (!lov_stripe_intersects(loo->lo_lsm, sub->sub_stripe,
-                                           fstart, fend, &start, &end))
-                        continue;
-                subneed->cld_start = cl_index(subobj, start);
-                subneed->cld_end   = cl_index(subobj, end);
-                subneed->cld_obj   = subobj;
-                if (!cl_lock_ext_match(&sub->sub_got, subneed)) {
-                        result = 0;
-                        break;
-                }
-        }
-        cl_lock_mutex_put(env, lock);
+               lls->sub_is_enqueued = 1;
+       }
+       RETURN(rc);
 }
 }
-#endif
 
 
-/**
- * Check if the extent region \a descr is covered by \a child against the
- * specific \a stripe.
- */
-static int lov_lock_stripe_is_matching(const struct lu_env *env,
-                                       struct lov_object *lov, int stripe,
-                                       const struct cl_lock_descr *child,
-                                       const struct cl_lock_descr *descr)
+static void lov_lock_cancel(const struct lu_env *env,
+                           const struct cl_lock_slice *slice)
 {
 {
-       struct lov_stripe_md *lsm = lov->lo_lsm;
-        obd_off start;
-        obd_off end;
-        int result;
+       struct cl_lock  *lock   = slice->cls_lock;
+       struct lov_lock *lovlck = cl2lov_lock(slice);
+       int i;
 
 
-        if (lov_r0(lov)->lo_nr == 1)
-                return cl_lock_ext_match(child, descr);
+       ENTRY;
 
 
-        /*
-         * For a multi-stripes object:
-         * - make sure the descr only covers child's stripe, and
-         * - check if extent is matching.
-         */
-        start = cl_offset(&lov->lo_cl, descr->cld_start);
-        end   = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
+       for (i = 0; i < lovlck->lls_nr; ++i) {
+               struct lov_lock_sub     *lls = &lovlck->lls_sub[i];
+               struct cl_lock          *sublock = &lls->sub_lock;
+               struct lov_sublock_env  *subenv;
 
 
-       result = 0;
-       /* glimpse should work on the object with LOV EA hole. */
-       if ((end - start <= lsm->lsm_stripe_size) ||
-           (descr->cld_end == CL_PAGE_EOF &&
-            unlikely(lov->lo_lsm->lsm_pattern & LOV_PATTERN_F_HOLE))) {
-               int idx;
+               if (!lls->sub_is_enqueued)
+                       continue;
 
 
-               idx = lov_stripe_number(lsm, start);
-               if (idx == stripe ||
-                   unlikely(lov_r0(lov)->lo_sub[idx] == NULL)) {
-                       idx = lov_stripe_number(lsm, end);
-                       if (idx == stripe ||
-                           unlikely(lov_r0(lov)->lo_sub[idx] == NULL))
-                               result = 1;
+               lls->sub_is_enqueued = 0;
+               subenv = lov_sublock_env_get(env, lock, lls);
+               if (!IS_ERR(subenv)) {
+                       cl_lock_cancel(subenv->lse_env, sublock);
+                       lov_sublock_env_put(subenv);
+               } else {
+                       CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
+                                     "lov_lock_cancel fails with %ld.\n",
+                                     PTR_ERR(subenv));
                }
        }
                }
        }
-
-       if (result != 0) {
-                struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
-                obd_off sub_start;
-                obd_off sub_end;
-
-                subd->cld_obj  = NULL;   /* don't need sub object at all */
-                subd->cld_mode = descr->cld_mode;
-                subd->cld_gid  = descr->cld_gid;
-                result = lov_stripe_intersects(lsm, stripe, start, end,
-                                               &sub_start, &sub_end);
-                LASSERT(result);
-                subd->cld_start = cl_index(child->cld_obj, sub_start);
-                subd->cld_end   = cl_index(child->cld_obj, sub_end);
-                result = cl_lock_ext_match(child, subd);
-        }
-        return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_fits_into() method.
- *
- * Checks whether a lock (given by \a slice) is suitable for \a
- * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
- * O_APPEND write.
- *
- * \see ccc_lock_fits_into().
- */
-static int lov_lock_fits_into(const struct lu_env *env,
-                              const struct cl_lock_slice *slice,
-                              const struct cl_lock_descr *need,
-                              const struct cl_io *io)
-{
-        struct lov_lock   *lov = cl2lov_lock(slice);
-        struct lov_object *obj = cl2lov(slice->cls_obj);
-        int result;
-
-        LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
-        LASSERT(lov->lls_nr > 0);
-
-        ENTRY;
-
-       /* for top lock, it's necessary to match enq flags otherwise it will
-        * run into problem if a sublock is missing and reenqueue. */
-       if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
-               return 0;
-
-       if (lov->lls_ever_canceled)
-               return 0;
-
-        if (need->cld_mode == CLM_GROUP)
-                /*
-                 * always allow to match group lock.
-                 */
-                result = cl_lock_ext_match(&lov->lls_orig, need);
-        else if (lov->lls_nr == 1) {
-                struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
-                result = lov_lock_stripe_is_matching(env,
-                                                     cl2lov(slice->cls_obj),
-                                                     lov->lls_sub[0].sub_stripe,
-                                                     got, need);
-        } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
-                   !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
-                /*
-                 * Multi-stripe locks are only suitable for `quick' IO and for
-                 * glimpse.
-                 */
-                result = 0;
-        else
-                /*
-                 * Most general case: multi-stripe existing lock, and
-                 * (potentially) multi-stripe @need lock. Check that @need is
-                 * covered by @lov's sub-locks.
-                 *
-                 * For now, ignore lock expansions made by the server, and
-                 * match against original lock extent.
-                 */
-                result = cl_lock_ext_match(&lov->lls_orig, need);
-        CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
-               PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
-               lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
-               result);
-        RETURN(result);
-}
-
-void lov_lock_unlink(const struct lu_env *env,
-                     struct lov_lock_link *link, struct lovsub_lock *sub)
-{
-        struct lov_lock *lck    = link->lll_super;
-        struct cl_lock  *parent = lck->lls_cl.cls_lock;
-
-        LASSERT(cl_lock_is_mutexed(parent));
-        LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-        ENTRY;
-
-       list_del_init(&link->lll_list);
-        LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
-        /* yank this sub-lock from parent's array */
-        lck->lls_sub[link->lll_idx].sub_lock = NULL;
-        LASSERT(lck->lls_nr_filled > 0);
-        lck->lls_nr_filled--;
-        lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
-        cl_lock_put(env, parent);
-        OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
-        EXIT;
-}
-
-struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
-                                         struct lov_lock *lck,
-                                         struct lovsub_lock *sub)
-{
-        struct lov_lock_link *scan;
-
-        LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-        ENTRY;
-
-       list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-                if (scan->lll_super == lck)
-                        RETURN(scan);
-        }
-        RETURN(NULL);
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked for "top-to-bottom" delete, when lock destruction starts from the
- * top-lock, e.g., as a result of inode destruction.
- *
- * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
- * this is done separately elsewhere:
- *
- *     - for inode destruction, lov_object_delete() calls cl_object_kill() for
- *       each sub-object, purging its locks;
- *
- *     - in other cases (e.g., a fatal error with a top-lock) sub-locks are
- *       left in the cache.
- */
-static void lov_lock_delete(const struct lu_env *env,
-                            const struct cl_lock_slice *slice)
-{
-        struct lov_lock        *lck     = cl2lov_lock(slice);
-        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-        struct lov_lock_link   *link;
-        int                     rc;
-        int                     i;
-
-        LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
-        ENTRY;
-
-        for (i = 0; i < lck->lls_nr; ++i) {
-                struct lov_lock_sub *lls = &lck->lls_sub[i];
-                struct lovsub_lock  *lsl = lls->sub_lock;
-
-                if (lsl == NULL) /* already removed */
-                        continue;
-
-                rc = lov_sublock_lock(env, lck, lls, closure, NULL);
-                if (rc == CLO_REPEAT) {
-                        --i;
-                        continue;
-                }
-
-                LASSERT(rc == 0);
-                LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
-
-                if (lls->sub_flags & LSF_HELD)
-                        lov_sublock_release(env, lck, i, 1, 0);
-
-                link = lov_lock_link_find(env, lck, lsl);
-                LASSERT(link != NULL);
-                lov_lock_unlink(env, link, lsl);
-                LASSERT(lck->lls_sub[i].sub_lock == NULL);
-
-                lov_sublock_unlock(env, lsl, closure, NULL);
-        }
-
-        cl_lock_closure_fini(closure);
-        EXIT;
 }
 
 static int lov_lock_print(const struct lu_env *env, void *cookie,
 }
 
 static int lov_lock_print(const struct lu_env *env, void *cookie,
@@ -1160,12 +305,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
                 struct lov_lock_sub *sub;
 
                 sub = &lck->lls_sub[i];
                 struct lov_lock_sub *sub;
 
                 sub = &lck->lls_sub[i];
-                (*p)(env, cookie, "    %d %x: ", i, sub->sub_flags);
-                if (sub->sub_lock != NULL)
-                        cl_lock_print(env, cookie, p,
-                                      sub->sub_lock->lss_cl.cls_lock);
-                else
-                        (*p)(env, cookie, "---\n");
+               (*p)(env, cookie, "    %d %x: ", i, sub->sub_is_enqueued);
+               cl_lock_print(env, cookie, p, &sub->sub_lock);
         }
         return 0;
 }
         }
         return 0;
 }
@@ -1173,12 +314,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
 static const struct cl_lock_operations lov_lock_ops = {
         .clo_fini      = lov_lock_fini,
         .clo_enqueue   = lov_lock_enqueue,
 static const struct cl_lock_operations lov_lock_ops = {
         .clo_fini      = lov_lock_fini,
         .clo_enqueue   = lov_lock_enqueue,
-        .clo_wait      = lov_lock_wait,
-        .clo_use       = lov_lock_use,
-        .clo_unuse     = lov_lock_unuse,
         .clo_cancel    = lov_lock_cancel,
         .clo_cancel    = lov_lock_cancel,
-        .clo_fits_into = lov_lock_fits_into,
-        .clo_delete    = lov_lock_delete,
         .clo_print     = lov_lock_print
 };
 
         .clo_print     = lov_lock_print
 };
 
@@ -1186,15 +322,14 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
                        struct cl_lock *lock, const struct cl_io *io)
 {
        struct lov_lock *lck;
                        struct cl_lock *lock, const struct cl_io *io)
 {
        struct lov_lock *lck;
-       int result;
+       int result = 0;
 
        ENTRY;
 
        ENTRY;
-       OBD_SLAB_ALLOC_PTR_GFP(lck, lov_lock_kmem, GFP_NOFS);
-       if (lck != NULL) {
+       lck = lov_lock_sub_init(env, obj, lock);
+       if (!IS_ERR(lck))
                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
-               result = lov_lock_sub_init(env, lck, io);
-       } else
-               result = -ENOMEM;
+       else
+               result = PTR_ERR(lck);
        RETURN(result);
 }
 
        RETURN(result);
 }
 
@@ -1228,22 +363,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
        OBD_SLAB_ALLOC_PTR_GFP(lck, lov_lock_kmem, GFP_NOFS);
        if (lck != NULL) {
                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
        OBD_SLAB_ALLOC_PTR_GFP(lck, lov_lock_kmem, GFP_NOFS);
        if (lck != NULL) {
                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
-               lck->lls_orig = lock->cll_descr;
                result = 0;
        }
        RETURN(result);
 }
 
                result = 0;
        }
        RETURN(result);
 }
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-                                               struct cl_lock *parent)
-{
-        struct cl_lock_closure *closure;
-
-        closure = &lov_env_info(env)->lti_closure;
-       LASSERT(list_empty(&closure->clc_list));
-        cl_lock_closure_init(env, closure, parent, 1);
-        return closure;
-}
-
-
 /** @} lov */
 /** @} lov */
index 19a9bcf..61b5a9f 100644 (file)
@@ -314,8 +314,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
        LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
 
        lov_layout_wait(env, lov);
        LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
 
        lov_layout_wait(env, lov);
-
-       cl_locks_prune(env, &lov->lo_cl, 0);
        return 0;
 }
 
        return 0;
 }
 
@@ -383,7 +381,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
                         struct lovsub_object *los = r0->lo_sub[i];
 
                         if (los != NULL) {
                         struct lovsub_object *los = r0->lo_sub[i];
 
                         if (los != NULL) {
-                               cl_locks_prune(env, &los->lso_cl, 1);
+                               cl_object_prune(env, &los->lso_cl);
                                 /*
                                  * If top-level object is to be evicted from
                                  * the cache, so are its sub-objects.
                                 /*
                                  * If top-level object is to be evicted from
                                  * the cache, so are its sub-objects.
@@ -392,7 +390,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
                        }
                }
        }
                        }
                }
        }
-       cl_locks_prune(env, &lov->lo_cl, 0);
        RETURN(0);
 }
 
        RETURN(0);
 }
 
@@ -725,7 +722,9 @@ static int lov_layout_change(const struct lu_env *unused,
        old_ops = &lov_dispatch[lov->lo_type];
        new_ops = &lov_dispatch[llt];
 
        old_ops = &lov_dispatch[lov->lo_type];
        new_ops = &lov_dispatch[llt];
 
-       cl_object_prune(env, &lov->lo_cl);
+       result = cl_object_prune(env, &lov->lo_cl);
+       if (result != 0)
+               GOTO(out, result);
 
        result = old_ops->llo_delete(env, lov, &lov->u);
        if (result == 0) {
 
        result = old_ops->llo_delete(env, lov, &lov->u);
        if (result == 0) {
@@ -751,6 +750,7 @@ static int lov_layout_change(const struct lu_env *unused,
                }
        }
 
                }
        }
 
+out:
        cl_env_put(env, &refcheck);
        cl_env_reexit(cookie);
        RETURN(result);
        cl_env_put(env, &refcheck);
        cl_env_reexit(cookie);
        RETURN(result);
@@ -830,7 +830,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
                GOTO(out, result = -EBUSY);
        }
 
                GOTO(out, result = -EBUSY);
        }
 
-       lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+       result = lov_layout_change(env, lov, conf);
+       lov->lo_layout_invalid = result != 0;
        EXIT;
 
 out:
        EXIT;
 
 out:
index 7916d20..de8d6da 100644 (file)
@@ -64,411 +64,8 @@ static void lovsub_lock_fini(const struct lu_env *env,
         EXIT;
 }
 
         EXIT;
 }
 
-static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
-{
-       struct cl_lock *parent;
-
-       ENTRY;
-       parent = lov->lls_cl.cls_lock;
-       cl_lock_get(parent);
-       lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
-       cl_lock_mutex_get(env, parent);
-       EXIT;
-}
-
-static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
-{
-       struct cl_lock *parent;
-
-       ENTRY;
-       parent = lov->lls_cl.cls_lock;
-       cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
-       lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
-       cl_lock_put(env, parent);
-       EXIT;
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for lovsub layer, which
- * method is called whenever sub-lock state changes. Propagates state change
- * to the top-locks.
- */
-static void lovsub_lock_state(const struct lu_env *env,
-                              const struct cl_lock_slice *slice,
-                              enum cl_lock_state state)
-{
-        struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-        struct lov_lock_link *scan;
-
-        LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-        ENTRY;
-
-       list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-                struct lov_lock *lov    = scan->lll_super;
-                struct cl_lock  *parent = lov->lls_cl.cls_lock;
-
-                if (sub->lss_active != parent) {
-                        lovsub_parent_lock(env, lov);
-                        cl_lock_signal(env, parent);
-                        lovsub_parent_unlock(env, lov);
-                }
-        }
-        EXIT;
-}
-
-/**
- * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
- * asking parent lock.
- */
-static unsigned long lovsub_lock_weigh(const struct lu_env *env,
-                                       const struct cl_lock_slice *slice)
-{
-        struct lovsub_lock *lock = cl2lovsub_lock(slice);
-        struct lov_lock    *lov;
-        unsigned long       dumbbell;
-
-        ENTRY;
-
-        LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-       if (!list_empty(&lock->lss_parents)) {
-                /*
-                 * It is not clear whether all parents have to be asked and
-                 * their estimations summed, or it is enough to ask one. For
-                 * the current usages, one is always enough.
-                 */
-                lov = container_of(lock->lss_parents.next,
-                                   struct lov_lock_link, lll_list)->lll_super;
-
-                lovsub_parent_lock(env, lov);
-                dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
-                lovsub_parent_unlock(env, lov);
-        } else
-                dumbbell = 0;
-
-        RETURN(dumbbell);
-}
-
-/**
- * Maps start/end offsets within a stripe, to offsets within a file.
- */
-static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
-                                 struct lov_object *lov,
-                                 int stripe, struct cl_lock_descr *out)
-{
-        pgoff_t size; /* stripe size in pages */
-        pgoff_t skip; /* how many pages in every stripe are occupied by
-                       * "other" stripes */
-        pgoff_t start;
-        pgoff_t end;
-
-        ENTRY;
-        start = in->cld_start;
-        end   = in->cld_end;
-
-       if (lov->lo_lsm->lsm_stripe_count > 1) {
-               size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
-               skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
-
-                /* XXX overflow check here? */
-                start += start/size * skip + stripe * size;
-
-                if (end != CL_PAGE_EOF) {
-                        end += end/size * skip + stripe * size;
-                        /*
-                         * And check for overflow...
-                         */
-                        if (end < in->cld_end)
-                                end = CL_PAGE_EOF;
-                }
-        }
-        out->cld_start = start;
-        out->cld_end   = end;
-        EXIT;
-}
-
-/**
- * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
- * called in two ways:
- *
- *     - as part of receive call-back, when server returns granted extent to
- *       the client, and
- *
- *     - when top-lock finds existing sub-lock in the cache.
- *
- * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
- * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
- */
-int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
-                       struct lovsub_lock *sublock,
-                       const struct cl_lock_descr *d, int idx)
-{
-        struct cl_lock       *parent;
-        struct lovsub_object *subobj;
-        struct cl_lock_descr *pd;
-        struct cl_lock_descr *parent_descr;
-        int                   result;
-
-        parent       = lov->lls_cl.cls_lock;
-        parent_descr = &parent->cll_descr;
-        LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
-
-        subobj = cl2lovsub(sublock->lss_cl.cls_obj);
-        pd     = &lov_env_info(env)->lti_ldescr;
-
-        pd->cld_obj  = parent_descr->cld_obj;
-        pd->cld_mode = parent_descr->cld_mode;
-        pd->cld_gid  = parent_descr->cld_gid;
-        lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
-
-       /* LU-3027: only update extent of lock, plus the change in
-        * lovsub_lock_delete() that lock extent is modified after a sublock
-        * is canceled, we can make sure that the lock extent won't be updated
-        * any more. Therefore, lov_lock_fits_into() will always find feasible
-        * locks */
-        lov->lls_sub[idx].sub_got.cld_start = d->cld_start;
-        lov->lls_sub[idx].sub_got.cld_end = d->cld_end;
-        /*
-         * Notify top-lock about modification, if lock description changes
-         * materially.
-         */
-        if (!cl_lock_ext_match(parent_descr, pd))
-                result = cl_lock_modify(env, parent, pd);
-        else
-                result = 0;
-        return result;
-}
-
-static int lovsub_lock_modify(const struct lu_env *env,
-                              const struct cl_lock_slice *s,
-                              const struct cl_lock_descr *d)
-{
-        struct lovsub_lock   *lock   = cl2lovsub_lock(s);
-        struct lov_lock_link *scan;
-        struct lov_lock      *lov;
-        int result                   = 0;
-
-        ENTRY;
-
-        LASSERT(cl_lock_mode_match(d->cld_mode,
-                                   s->cls_lock->cll_descr.cld_mode));
-       list_for_each_entry(scan, &lock->lss_parents, lll_list) {
-                int rc;
-
-                lov = scan->lll_super;
-                lovsub_parent_lock(env, lov);
-                rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
-                lovsub_parent_unlock(env, lov);
-                result = result ?: rc;
-        }
-        RETURN(result);
-}
-
-static int lovsub_lock_closure(const struct lu_env *env,
-                               const struct cl_lock_slice *slice,
-                               struct cl_lock_closure *closure)
-{
-        struct lovsub_lock   *sub;
-        struct cl_lock       *parent;
-        struct lov_lock_link *scan;
-        int                   result;
-
-        LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-        ENTRY;
-
-        sub    = cl2lovsub_lock(slice);
-        result = 0;
-
-       list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-                parent = scan->lll_super->lls_cl.cls_lock;
-                result = cl_lock_closure_build(env, parent, closure);
-                if (result != 0)
-                        break;
-        }
-        RETURN(result);
-}
-
-/**
- * A helper function for lovsub_lock_delete() that deals with a given parent
- * top-lock.
- */
-static int lovsub_lock_delete_one(const struct lu_env *env,
-                                  struct cl_lock *child, struct lov_lock *lov)
-{
-        struct cl_lock *parent;
-        int             result;
-        ENTRY;
-
-        parent = lov->lls_cl.cls_lock;
-        if (parent->cll_error)
-                RETURN(0);
-
-        result = 0;
-       lov->lls_ever_canceled = 1;
-        switch (parent->cll_state) {
-       case CLS_ENQUEUED:
-               /* See LU-1355 for the case that a glimpse lock is
-                * interrupted by signal */
-               LASSERT(parent->cll_flags & CLF_CANCELLED);
-               break;
-        case CLS_QUEUING:
-        case CLS_FREEING:
-                cl_lock_signal(env, parent);
-                break;
-        case CLS_INTRANSIT:
-                /*
-                 * Here lies a problem: a sub-lock is canceled while top-lock
-                 * is being unlocked. Top-lock cannot be moved into CLS_NEW
-                 * state, because unlocking has to succeed eventually by
-                 * placing lock into CLS_CACHED (or failing it), see
-                 * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
-                 * state, because lov maintains an invariant that all
-                 * sub-locks exist in CLS_CACHED (this allows cached top-lock
-                 * to be reused immediately). Nor can we wait for top-lock
-                 * state to change, because this can be synchronous to the
-                 * current thread.
-                 *
-                 * We know for sure that lov_lock_unuse() will be called at
-                 * least one more time to finish un-using, so leave a mark on
-                 * the top-lock, that will be seen by the next call to
-                 * lov_lock_unuse().
-                 */
-                if (cl_lock_is_intransit(parent))
-                        lov->lls_cancel_race = 1;
-                break;
-        case CLS_CACHED:
-                /*
-                 * if a sub-lock is canceled move its top-lock into CLS_NEW
-                 * state to preserve an invariant that a top-lock in
-                 * CLS_CACHED is immediately ready for re-use (i.e., has all
-                 * sub-locks), and so that next attempt to re-use the top-lock
-                 * enqueues missing sub-lock.
-                 */
-                cl_lock_state_set(env, parent, CLS_NEW);
-                /* fall through */
-        case CLS_NEW:
-                /*
-                 * if last sub-lock is canceled, destroy the top-lock (which
-                 * is now `empty') proactively.
-                 */
-                if (lov->lls_nr_filled == 0) {
-                        /* ... but unfortunately, this cannot be done easily,
-                         * as cancellation of a top-lock might acquire mutices
-                         * of its other sub-locks, violating lock ordering,
-                         * see cl_lock_{cancel,delete}() preconditions.
-                         *
-                         * To work around this, the mutex of this sub-lock is
-                         * released, top-lock is destroyed, and sub-lock mutex
-                         * acquired again. The list of parents has to be
-                         * re-scanned from the beginning after this.
-                         *
-                         * Only do this if no mutices other than on @child and
-                         * @parent are held by the current thread.
-                         *
-                         * TODO: The lock modal here is too complex, because
-                         * the lock may be canceled and deleted by voluntarily:
-                         *    cl_lock_request
-                         *      -> osc_lock_enqueue_wait
-                         *        -> osc_lock_cancel_wait
-                         *          -> cl_lock_delete
-                         *            -> lovsub_lock_delete
-                         *              -> cl_lock_cancel/delete
-                         *                -> ...
-                         *
-                         * The better choice is to spawn a kernel thread for
-                         * this purpose. -jay
-                         */
-                        if (cl_lock_nr_mutexed(env) == 2) {
-                                cl_lock_mutex_put(env, child);
-                                cl_lock_cancel(env, parent);
-                                cl_lock_delete(env, parent);
-                                result = 1;
-                        }
-                }
-                break;
-        case CLS_HELD:
-                CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
-               /* falling through */
-        default:
-                CERROR("Impossible state: %d\n", parent->cll_state);
-                LBUG();
-                break;
-        }
-
-        RETURN(result);
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
- */
-static void lovsub_lock_delete(const struct lu_env *env,
-                               const struct cl_lock_slice *slice)
-{
-        struct cl_lock     *child = slice->cls_lock;
-        struct lovsub_lock *sub   = cl2lovsub_lock(slice);
-        int restart;
-
-        LASSERT(cl_lock_is_mutexed(child));
-
-        ENTRY;
-        /*
-         * Destruction of a sub-lock might take multiple iterations, because
-         * when the last sub-lock of a given top-lock is deleted, top-lock is
-         * canceled proactively, and this requires to release sub-lock
-         * mutex. Once sub-lock mutex has been released, list of its parents
-         * has to be re-scanned from the beginning.
-         */
-        do {
-                struct lov_lock      *lov;
-                struct lov_lock_link *scan;
-                struct lov_lock_link *temp;
-
-                restart = 0;
-               list_for_each_entry_safe(scan, temp,
-                                             &sub->lss_parents, lll_list) {
-                        lov     = scan->lll_super;
-                        lovsub_parent_lock(env, lov);
-                        lov_lock_unlink(env, scan, sub);
-                        restart = lovsub_lock_delete_one(env, child, lov);
-                        lovsub_parent_unlock(env, lov);
-
-                        if (restart) {
-                                cl_lock_mutex_get(env, child);
-                                break;
-                        }
-               }
-        } while (restart);
-        EXIT;
-}
-
-static int lovsub_lock_print(const struct lu_env *env, void *cookie,
-                             lu_printer_t p, const struct cl_lock_slice *slice)
-{
-        struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-        struct lov_lock      *lov;
-        struct lov_lock_link *scan;
-
-       list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-                lov = scan->lll_super;
-                (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
-                if (lov != NULL)
-                        cl_lock_descr_print(env, cookie, p,
-                                            &lov->lls_cl.cls_lock->cll_descr);
-                (*p)(env, cookie, "] ");
-        }
-        return 0;
-}
-
 static const struct cl_lock_operations lovsub_lock_ops = {
         .clo_fini    = lovsub_lock_fini,
 static const struct cl_lock_operations lovsub_lock_ops = {
         .clo_fini    = lovsub_lock_fini,
-        .clo_state   = lovsub_lock_state,
-        .clo_delete  = lovsub_lock_delete,
-        .clo_modify  = lovsub_lock_modify,
-        .clo_closure = lovsub_lock_closure,
-        .clo_weigh   = lovsub_lock_weigh,
-        .clo_print   = lovsub_lock_print
 };
 
 int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
 };
 
 int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
index a9945a9..94eae55 100644 (file)
@@ -163,7 +163,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
 
         io->ci_type = iot;
        INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
 
         io->ci_type = iot;
        INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
-       INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
        INIT_LIST_HEAD(&io->ci_lockset.cls_done);
        INIT_LIST_HEAD(&io->ci_layers);
 
        INIT_LIST_HEAD(&io->ci_lockset.cls_done);
        INIT_LIST_HEAD(&io->ci_layers);
 
@@ -242,45 +241,11 @@ int cl_io_rw_init(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(cl_io_rw_init);
 
 }
 EXPORT_SYMBOL(cl_io_rw_init);
 
-static inline const struct lu_fid *
-cl_lock_descr_fid(const struct cl_lock_descr *descr)
-{
-        return lu_object_fid(&descr->cld_obj->co_lu);
-}
-
 static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
                               const struct cl_lock_descr *d1)
 {
 static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
                               const struct cl_lock_descr *d1)
 {
-        return lu_fid_cmp(cl_lock_descr_fid(d0), cl_lock_descr_fid(d1)) ?:
-                __diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
-                             const struct cl_lock_descr *d1)
-{
-        int ret;
-
-        ret = lu_fid_cmp(cl_lock_descr_fid(d0), cl_lock_descr_fid(d1));
-        if (ret)
-                return ret;
-        if (d0->cld_end < d1->cld_start)
-                return -1;
-        if (d0->cld_start > d0->cld_end)
-                return 1;
-        return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
-                                const struct cl_lock_descr *d1)
-{
-        d0->cld_start = min(d0->cld_start, d1->cld_start);
-        d0->cld_end = max(d0->cld_end, d1->cld_end);
-
-        if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
-                d0->cld_mode = CLM_WRITE;
-
-        if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
-                d0->cld_mode = CLM_GROUP;
+       return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
+                         lu_object_fid(&d1->cld_obj->co_lu));
 }
 
 /*
 }
 
 /*
@@ -329,35 +294,35 @@ static void cl_io_locks_sort(struct cl_io *io)
        EXIT;
 }
 
        EXIT;
 }
 
-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval   0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
-                   const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+                               const struct cl_lock_descr *d1)
 {
 {
-       struct cl_io_lock_link *scan;
-       ENTRY;
+       d0->cld_start = min(d0->cld_start, d1->cld_start);
+       d0->cld_end = max(d0->cld_end, d1->cld_end);
 
 
-       list_for_each_entry(scan, queue, cill_linkage) {
-               if (cl_lock_descr_match(&scan->cill_descr, need))
-                       RETURN(+1);
-       }
-       RETURN(0);
+       if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+               d0->cld_mode = CLM_WRITE;
+
+       if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+               d0->cld_mode = CLM_GROUP;
 }
 }
-EXPORT_SYMBOL(cl_queue_match);
 
 
-static int cl_queue_merge(const struct list_head *queue,
-                          const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+                           const struct cl_lock_descr *need)
 {
        struct cl_io_lock_link *scan;
 {
        struct cl_io_lock_link *scan;
-       ENTRY;
 
 
-       list_for_each_entry(scan, queue, cill_linkage) {
-               if (cl_lock_descr_cmp(&scan->cill_descr, need))
+       ENTRY;
+       list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+               if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
                        continue;
                        continue;
+
+               /* Merge locks for the same object because ldlm lock server
+                * may expand the lock extent, otherwise there is a deadlock
+                * case if two conflicted locks are queueud for the same object
+                * and lock server expands one lock to overlap the another.
+                * The side effect is that it can generate a multi-stripe lock
+                * that may cause casacading problem */
                cl_lock_descr_merge(&scan->cill_descr, need);
                CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
                       scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
                cl_lock_descr_merge(&scan->cill_descr, need);
                CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
                       scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@@ -367,91 +332,21 @@ static int cl_queue_merge(const struct list_head *queue,
        RETURN(0);
 }
 
        RETURN(0);
 }
 
-static int cl_lockset_match(const struct cl_lockset *set,
-                            const struct cl_lock_descr *need)
-{
-        return cl_queue_match(&set->cls_curr, need) ||
-               cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
-                            const struct cl_lock_descr *need)
-{
-        return cl_queue_merge(&set->cls_todo, need) ||
-               cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
-                               struct cl_io *io, struct cl_lockset *set,
-                               struct cl_io_lock_link *link)
-{
-        struct cl_lock *lock;
-        int             result;
-
-        ENTRY;
-
-       lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
-        if (!IS_ERR(lock)) {
-                link->cill_lock = lock;
-               list_move(&link->cill_linkage, &set->cls_curr);
-                if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
-                        result = cl_wait(env, lock);
-                        if (result == 0)
-                               list_move(&link->cill_linkage, &set->cls_done);
-                } else
-                        result = 0;
-        } else
-                result = PTR_ERR(lock);
-        RETURN(result);
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
-                              struct cl_io_lock_link *link)
-{
-        struct cl_lock *lock = link->cill_lock;
-
-        ENTRY;
-       list_del_init(&link->cill_linkage);
-        if (lock != NULL) {
-                cl_lock_release(env, lock, "io", io);
-                link->cill_lock = NULL;
-        }
-        if (link->cill_fini != NULL)
-                link->cill_fini(env, link);
-        EXIT;
-}
-
 static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
 static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
-                           struct cl_lockset *set)
+                          struct cl_lockset *set)
 {
 {
-       struct cl_io_lock_link  *link;
-       struct cl_io_lock_link  *temp;
-       struct cl_lock          *lock;
+       struct cl_io_lock_link *link;
+       struct cl_io_lock_link *temp;
        int result;
 
        ENTRY;
        result = 0;
        list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
        int result;
 
        ENTRY;
        result = 0;
        list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
-               if (!cl_lockset_match(set, &link->cill_descr)) {
-                       /* XXX some locking to guarantee that locks aren't
-                        * expanded in between. */
-                       result = cl_lockset_lock_one(env, io, set, link);
-                       if (result != 0)
-                               break;
-               } else
-                       cl_lock_link_fini(env, io, link);
-       }
-       if (result == 0) {
-               list_for_each_entry_safe(link, temp, &set->cls_curr,
-                                        cill_linkage) {
-                       lock = link->cill_lock;
-                       result = cl_wait(env, lock);
-                       if (result == 0)
-                               list_move(&link->cill_linkage, &set->cls_done);
-                       else
-                               break;
-               }
+               result = cl_lock_request(env, io, &link->cill_lock);
+               if (result < 0)
+                       break;
+
+               list_move(&link->cill_linkage, &set->cls_done);
        }
        RETURN(result);
 }
        }
        RETURN(result);
 }
@@ -509,16 +404,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
         ENTRY;
         set = &io->ci_lockset;
 
         ENTRY;
         set = &io->ci_lockset;
 
-       list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
-               cl_lock_link_fini(env, io, link);
-
-       list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
-               cl_lock_link_fini(env, io, link);
+       list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+               list_del_init(&link->cill_linkage);
+               if (link->cill_fini != NULL)
+                       link->cill_fini(env, link);
+       }
 
        list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
 
        list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
-               cl_unuse(env, link->cill_lock);
-               cl_lock_link_fini(env, io, link);
+               list_del_init(&link->cill_linkage);
+               cl_lock_release(env, &link->cill_lock);
+               if (link->cill_fini != NULL)
+                       link->cill_fini(env, link);
        }
        }
+
        cl_io_for_each_reverse(scan, io) {
                if (scan->cis_iop->op[io->ci_type].cio_unlock != NULL)
                        scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
        cl_io_for_each_reverse(scan, io) {
                if (scan->cis_iop->op[io->ci_type].cio_unlock != NULL)
                        scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@@ -642,21 +540,21 @@ static void cl_free_io_lock_link(const struct lu_env *env,
 int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
                          struct cl_lock_descr *descr)
 {
 int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
                          struct cl_lock_descr *descr)
 {
-        struct cl_io_lock_link *link;
-        int result;
+       struct cl_io_lock_link *link;
+       int result;
 
 
-        ENTRY;
-        OBD_ALLOC_PTR(link);
-        if (link != NULL) {
-                link->cill_descr     = *descr;
-                link->cill_fini      = cl_free_io_lock_link;
-                result = cl_io_lock_add(env, io, link);
-                if (result) /* lock match */
-                        link->cill_fini(env, link);
-        } else
-                result = -ENOMEM;
+       ENTRY;
+       OBD_ALLOC_PTR(link);
+       if (link != NULL) {
+               link->cill_descr = *descr;
+               link->cill_fini  = cl_free_io_lock_link;
+               result = cl_io_lock_add(env, io, link);
+               if (result) /* lock match */
+                       link->cill_fini(env, link);
+       } else
+               result = -ENOMEM;
 
 
-        RETURN(result);
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_io_lock_alloc_add);
 
 }
 EXPORT_SYMBOL(cl_io_lock_alloc_add);
 
@@ -1577,6 +1475,7 @@ void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
                     void (*end)(const struct lu_env *, struct cl_sync_io *))
 {
        ENTRY;
                     void (*end)(const struct lu_env *, struct cl_sync_io *))
 {
        ENTRY;
+       memset(anchor, 0, sizeof(*anchor));
        init_waitqueue_head(&anchor->csi_waitq);
        atomic_set(&anchor->csi_sync_nr, nr);
        atomic_set(&anchor->csi_barrier, nr > 0);
        init_waitqueue_head(&anchor->csi_waitq);
        atomic_set(&anchor->csi_sync_nr, nr);
        atomic_set(&anchor->csi_barrier, nr > 0);
index 6aaf676..e01f5c8 100644 (file)
 #include <cl_object.h>
 #include "cl_internal.h"
 
 #include <cl_object.h>
 #include "cl_internal.h"
 
-/** Lock class of cl_lock::cll_guard */
-static struct lock_class_key cl_lock_guard_class;
-static struct kmem_cache *cl_lock_kmem;
-
-static struct lu_kmem_descr cl_lock_caches[] = {
-        {
-                .ckd_cache = &cl_lock_kmem,
-                .ckd_name  = "cl_lock_kmem",
-                .ckd_size  = sizeof (struct cl_lock)
-        },
-        {
-                .ckd_cache = NULL
-        }
-};
-
-#ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
-#define CS_LOCK_INC(o, item) \
-       atomic_inc(&cl_object_site(o)->cs_locks.cs_stats[CS_##item])
-#define CS_LOCK_DEC(o, item) \
-       atomic_dec(&cl_object_site(o)->cs_locks.cs_stats[CS_##item])
-#define CS_LOCKSTATE_INC(o, state) \
-       atomic_inc(&cl_object_site(o)->cs_locks_state[state])
-#define CS_LOCKSTATE_DEC(o, state) \
-       atomic_dec(&cl_object_site(o)->cs_locks_state[state])
-#else
-#define CS_LOCK_INC(o, item)
-#define CS_LOCK_DEC(o, item)
-#define CS_LOCKSTATE_INC(o, state)
-#define CS_LOCKSTATE_DEC(o, state)
-#endif
-
-/**
- * Basic lock invariant that is maintained at all times. Caller either has a
- * reference to \a lock, or somehow assures that \a lock cannot be freed.
- *
- * \see cl_lock_invariant()
- */
-static int cl_lock_invariant_trusted(const struct lu_env *env,
-                                     const struct cl_lock *lock)
-{
-        return  ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
-               atomic_read(&lock->cll_ref) >= lock->cll_holds &&
-                lock->cll_holds >= lock->cll_users &&
-                lock->cll_holds >= 0 &&
-                lock->cll_users >= 0 &&
-                lock->cll_depth >= 0;
-}
-
-/**
- * Stronger lock invariant, checking that caller has a reference on a lock.
- *
- * \see cl_lock_invariant_trusted()
- */
-static int cl_lock_invariant(const struct lu_env *env,
-                             const struct cl_lock *lock)
-{
-        int result;
-
-       result = atomic_read(&lock->cll_ref) > 0 &&
-                cl_lock_invariant_trusted(env, lock);
-       if (!result && env != NULL)
-               CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n");
-       return result;
-}
-
-/**
- * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
- */
-static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
-{
-        return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
-}
-
-/**
- * Returns a set of counters for this lock, depending on a lock nesting.
- */
-static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
-                                                   const struct cl_lock *lock)
-{
-        struct cl_thread_info *info;
-        enum clt_nesting_level nesting;
-
-        info = cl_env_info(env);
-        nesting = cl_lock_nesting(lock);
-        LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
-        return &info->clt_counters[nesting];
-}
-
 static void cl_lock_trace0(int level, const struct lu_env *env,
 static void cl_lock_trace0(int level, const struct lu_env *env,
-                           const char *prefix, const struct cl_lock *lock,
-                           const char *func, const int line)
+                          const char *prefix, const struct cl_lock *lock,
+                          const char *func, const int line)
 {
 {
-        struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
-        CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)"
-                      "(%p/%d/%d) at %s():%d\n",
-              prefix, lock, atomic_read(&lock->cll_ref),
-               lock->cll_guarder, lock->cll_depth,
-               lock->cll_state, lock->cll_error, lock->cll_holds,
-               lock->cll_users, lock->cll_flags,
-               env, h->coh_nesting, cl_lock_nr_mutexed(env),
-               func, line);
+       struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
+       CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
+              prefix, lock, env, h->coh_nesting, func, line);
 }
 #define cl_lock_trace(level, env, prefix, lock)                         \
         cl_lock_trace0(level, env, prefix, lock, __FUNCTION__, __LINE__)
 
 }
 #define cl_lock_trace(level, env, prefix, lock)                         \
         cl_lock_trace0(level, env, prefix, lock, __FUNCTION__, __LINE__)
 
-#define RETIP ((unsigned long)__builtin_return_address(0))
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key cl_lock_key;
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{
-        lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
-}
-
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-                                    struct cl_lock *lock, __u32 enqflags)
-{
-        cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
-        lock_map_acquire(&lock->dep_map);
-}
-
-static void cl_lock_lockdep_release(const struct lu_env *env,
-                                    struct cl_lock *lock)
-{
-        cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
-       lock_map_release(&lock->dep_map);
-}
-
-#else /* !CONFIG_LOCKDEP */
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{}
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-                                    struct cl_lock *lock, __u32 enqflags)
-{}
-static void cl_lock_lockdep_release(const struct lu_env *env,
-                                    struct cl_lock *lock)
-{}
-
-#endif /* !CONFIG_LOCKDEP */
-
 /**
  * Adds lock slice to the compound lock.
  *
 /**
  * Adds lock slice to the compound lock.
  *
@@ -213,1980 +82,211 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
 }
 EXPORT_SYMBOL(cl_lock_slice_add);
 
 }
 EXPORT_SYMBOL(cl_lock_slice_add);
 
-/**
- * Returns true iff a lock with the mode \a has provides at least the same
- * guarantees as a lock with the mode \a need.
- */
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
-{
-        LINVRNT(need == CLM_READ || need == CLM_WRITE ||
-                need == CLM_PHANTOM || need == CLM_GROUP);
-        LINVRNT(has == CLM_READ || has == CLM_WRITE ||
-                has == CLM_PHANTOM || has == CLM_GROUP);
-        CLASSERT(CLM_PHANTOM < CLM_READ);
-        CLASSERT(CLM_READ < CLM_WRITE);
-        CLASSERT(CLM_WRITE < CLM_GROUP);
-
-        if (has != CLM_GROUP)
-                return need <= has;
-        else
-                return need == has;
-}
-EXPORT_SYMBOL(cl_lock_mode_match);
-
-/**
- * Returns true iff extent portions of lock descriptions match.
- */
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-                      const struct cl_lock_descr *need)
-{
-        return
-                has->cld_start <= need->cld_start &&
-                has->cld_end >= need->cld_end &&
-                cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
-                (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
-}
-EXPORT_SYMBOL(cl_lock_ext_match);
-
-/**
- * Returns true iff a lock with the description \a has provides at least the
- * same guarantees as a lock with the description \a need.
- */
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-                        const struct cl_lock_descr *need)
-{
-        return
-                cl_object_same(has->cld_obj, need->cld_obj) &&
-                cl_lock_ext_match(has, need);
-}
-EXPORT_SYMBOL(cl_lock_descr_match);
-
-static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
 {
 {
-        struct cl_object *obj = lock->cll_descr.cld_obj;
+       ENTRY;
 
 
-        LINVRNT(!cl_lock_is_mutexed(lock));
+       cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
 
 
-       ENTRY;
-       cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
        while (!list_empty(&lock->cll_layers)) {
                struct cl_lock_slice *slice;
 
                slice = list_entry(lock->cll_layers.next,
        while (!list_empty(&lock->cll_layers)) {
                struct cl_lock_slice *slice;
 
                slice = list_entry(lock->cll_layers.next,
-                                  struct cl_lock_slice, cls_linkage);
+                               struct cl_lock_slice, cls_linkage);
                list_del_init(lock->cll_layers.next);
                slice->cls_ops->clo_fini(env, slice);
        }
                list_del_init(lock->cll_layers.next);
                slice->cls_ops->clo_fini(env, slice);
        }
-       CS_LOCK_DEC(obj, total);
-       CS_LOCKSTATE_DEC(obj, lock->cll_state);
-       lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock);
-        cl_object_put(env, obj);
-        lu_ref_fini(&lock->cll_reference);
-        lu_ref_fini(&lock->cll_holders);
-       mutex_destroy(&lock->cll_guard);
-        OBD_SLAB_FREE_PTR(lock, cl_lock_kmem);
-        EXIT;
-}
-
-/**
- * Releases a reference on a lock.
- *
- * When last reference is released, lock is returned to the cache, unless it
- * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
- * immediately.
- *
- * \see cl_object_put(), cl_page_put()
- */
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
-{
-        struct cl_object        *obj;
-
-        LINVRNT(cl_lock_invariant(env, lock));
-        ENTRY;
-        obj = lock->cll_descr.cld_obj;
-        LINVRNT(obj != NULL);
-
-        CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
-              atomic_read(&lock->cll_ref), lock, RETIP);
-
-       if (atomic_dec_and_test(&lock->cll_ref)) {
-                if (lock->cll_state == CLS_FREEING) {
-                       LASSERT(list_empty(&lock->cll_linkage));
-                        cl_lock_free(env, lock);
-                }
-               CS_LOCK_DEC(obj, busy);
-        }
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_put);
-
-/**
- * Acquires an additional reference to a lock.
- *
- * This can be called only by caller already possessing a reference to \a
- * lock.
- *
- * \see cl_object_get(), cl_page_get()
- */
-void cl_lock_get(struct cl_lock *lock)
-{
-        LINVRNT(cl_lock_invariant(NULL, lock));
-        CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
-              atomic_read(&lock->cll_ref), lock, RETIP);
-       atomic_inc(&lock->cll_ref);
-}
-EXPORT_SYMBOL(cl_lock_get);
-
-/**
- * Acquires a reference to a lock.
- *
- * This is much like cl_lock_get(), except that this function can be used to
- * acquire initial reference to the cached lock. Caller has to deal with all
- * possible races. Use with care!
- *
- * \see cl_page_get_trust()
- */
-void cl_lock_get_trust(struct cl_lock *lock)
-{
-        CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
-              atomic_read(&lock->cll_ref), lock, RETIP);
-       if (atomic_inc_return(&lock->cll_ref) == 1)
-               CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
-}
-EXPORT_SYMBOL(cl_lock_get_trust);
-
-/**
- * Helper function destroying the lock that wasn't completely initialized.
- *
- * Other threads can acquire references to the top-lock through its
- * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
- */
-static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
-{
-        cl_lock_mutex_get(env, lock);
-        cl_lock_cancel(env, lock);
-        cl_lock_delete(env, lock);
-        cl_lock_mutex_put(env, lock);
-        cl_lock_put(env, lock);
+       POISON(lock, 0x5a, sizeof(*lock));
+       EXIT;
 }
 }
+EXPORT_SYMBOL(cl_lock_fini);
 
 
-static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
-                                    struct cl_object *obj,
-                                    const struct cl_io *io,
-                                    const struct cl_lock_descr *descr)
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+                const struct cl_io *io)
 {
 {
-       struct cl_lock          *lock;
-       struct lu_object_header *head;
-
+       struct cl_object *obj = lock->cll_descr.cld_obj;
+       struct cl_object *scan;
+       int result = 0;
        ENTRY;
        ENTRY;
-       OBD_SLAB_ALLOC_PTR_GFP(lock, cl_lock_kmem, GFP_NOFS);
-       if (lock != NULL) {
-               atomic_set(&lock->cll_ref, 1);
-               lock->cll_descr = *descr;
-               lock->cll_state = CLS_NEW;
-               cl_object_get(obj);
-               lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock",
-                                    lock);
-               INIT_LIST_HEAD(&lock->cll_layers);
-               INIT_LIST_HEAD(&lock->cll_linkage);
-               INIT_LIST_HEAD(&lock->cll_inclosure);
-               lu_ref_init(&lock->cll_reference);
-               lu_ref_init(&lock->cll_holders);
-               mutex_init(&lock->cll_guard);
-               lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
-               init_waitqueue_head(&lock->cll_wq);
-               head = obj->co_lu.lo_header;
-               CS_LOCKSTATE_INC(obj, CLS_NEW);
-               CS_LOCK_INC(obj, total);
-               CS_LOCK_INC(obj, create);
-               cl_lock_lockdep_init(lock);
-               list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) {
-                       int err;
-
-                       err = obj->co_ops->coo_lock_init(env, obj, lock, io);
-                       if (err != 0) {
-                               cl_lock_finish(env, lock);
-                               lock = ERR_PTR(err);
-                               break;
-                       }
-               }
-       } else
-               lock = ERR_PTR(-ENOMEM);
-       RETURN(lock);
-}
-
-/**
- * Transfer the lock into INTRANSIT state and return the original state.
- *
- * \pre  state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
- * \post state: CLS_INTRANSIT
- * \see CLS_INTRANSIT
- */
-enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
-                                    struct cl_lock *lock)
-{
-       enum cl_lock_state state = lock->cll_state;
-
-       LASSERT(cl_lock_is_mutexed(lock));
-       LASSERT(state != CLS_INTRANSIT);
-       LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
-                "Malformed lock state %d.\n", state);
-
-       cl_lock_state_set(env, lock, CLS_INTRANSIT);
-       lock->cll_intransit_owner = current;
-       cl_lock_hold_add(env, lock, "intransit", current);
-       return state;
-}
-EXPORT_SYMBOL(cl_lock_intransit);
-
-/**
- *  Exit the intransit state and restore the lock state to the original state
- */
-void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
-                      enum cl_lock_state state)
-{
-       LASSERT(cl_lock_is_mutexed(lock));
-       LASSERT(lock->cll_state == CLS_INTRANSIT);
-       LASSERT(state != CLS_INTRANSIT);
-       LASSERT(lock->cll_intransit_owner == current);
-
-       lock->cll_intransit_owner = NULL;
-       cl_lock_state_set(env, lock, state);
-       cl_lock_unhold(env, lock, "intransit", current);
-}
-EXPORT_SYMBOL(cl_lock_extransit);
-
-/**
- * Checking whether the lock is intransit state
- */
-int cl_lock_is_intransit(struct cl_lock *lock)
-{
-       LASSERT(cl_lock_is_mutexed(lock));
-       return lock->cll_state == CLS_INTRANSIT &&
-              lock->cll_intransit_owner != current;
-}
-EXPORT_SYMBOL(cl_lock_is_intransit);
-/**
- * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
- * truncate and O_APPEND cannot be reused for read/non-append-write, as they
- * cover multiple stripes and can trigger cascading timeouts.
- */
-static int cl_lock_fits_into(const struct lu_env *env,
-                             const struct cl_lock *lock,
-                             const struct cl_lock_descr *need,
-                             const struct cl_io *io)
-{
-        const struct cl_lock_slice *slice;
-
-        LINVRNT(cl_lock_invariant_trusted(env, lock));
-        ENTRY;
-       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_ops->clo_fits_into != NULL &&
-                    !slice->cls_ops->clo_fits_into(env, slice, need, io))
-                        RETURN(0);
-        }
-        RETURN(1);
-}
-
-static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
-                                      struct cl_object *obj,
-                                      const struct cl_io *io,
-                                      const struct cl_lock_descr *need)
-{
-        struct cl_lock          *lock;
-        struct cl_object_header *head;
-
-        ENTRY;
-
-       head = cl_object_header(obj);
-       assert_spin_locked(&head->coh_lock_guard);
-       CS_LOCK_INC(obj, lookup);
-       list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
-               int matched;
-
-                matched = cl_lock_ext_match(&lock->cll_descr, need) &&
-                          lock->cll_state < CLS_FREEING &&
-                          lock->cll_error == 0 &&
-                          !(lock->cll_flags & CLF_CANCELLED) &&
-                          cl_lock_fits_into(env, lock, need, io);
-                CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
-                       PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
-                       matched);
-                if (matched) {
-                        cl_lock_get_trust(lock);
-                       CS_LOCK_INC(obj, hit);
-                        RETURN(lock);
-                }
-        }
-        RETURN(NULL);
-}
-
-/**
- * Returns a lock matching description \a need.
- *
- * This is the main entry point into the cl_lock caching interface. First, a
- * cache (implemented as a per-object linked list) is consulted. If lock is
- * found there, it is returned immediately. Otherwise new lock is allocated
- * and returned. In any case, additional reference to lock is acquired.
- *
- * \see cl_object_find(), cl_page_find()
- */
-static struct cl_lock *cl_lock_find(const struct lu_env *env,
-                                    const struct cl_io *io,
-                                    const struct cl_lock_descr *need)
-{
-        struct cl_object_header *head;
-        struct cl_object        *obj;
-        struct cl_lock          *lock;
-
-        ENTRY;
-
-        obj  = need->cld_obj;
-        head = cl_object_header(obj);
-
-       spin_lock(&head->coh_lock_guard);
-       lock = cl_lock_lookup(env, obj, io, need);
-       spin_unlock(&head->coh_lock_guard);
-
-       if (lock == NULL) {
-               lock = cl_lock_alloc(env, obj, io, need);
-               if (!IS_ERR(lock)) {
-                       struct cl_lock *ghost;
-
-                       spin_lock(&head->coh_lock_guard);
-                       ghost = cl_lock_lookup(env, obj, io, need);
-                       if (ghost == NULL) {
-                               cl_lock_get_trust(lock);
-                               list_add_tail(&lock->cll_linkage,
-                                             &head->coh_locks);
-                               spin_unlock(&head->coh_lock_guard);
-                               CS_LOCK_INC(obj, busy);
-                       } else {
-                               spin_unlock(&head->coh_lock_guard);
-                                /*
-                                 * Other threads can acquire references to the
-                                 * top-lock through its sub-locks. Hence, it
-                                 * cannot be cl_lock_free()-ed immediately.
-                                 */
-                                cl_lock_finish(env, lock);
-                                lock = ghost;
-                        }
-                }
-        }
-        RETURN(lock);
-}
-
-/**
- * Returns existing lock matching given description. This is similar to
- * cl_lock_find() except that no new lock is created, and returned lock is
- * guaranteed to be in enum cl_lock_state::CLS_HELD state.
- */
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-                             const struct cl_lock_descr *need,
-                             const char *scope, const void *source)
-{
-        struct cl_object_header *head;
-        struct cl_object        *obj;
-        struct cl_lock          *lock;
-
-        obj  = need->cld_obj;
-        head = cl_object_header(obj);
 
 
-       do {
-               spin_lock(&head->coh_lock_guard);
-               lock = cl_lock_lookup(env, obj, io, need);
-               spin_unlock(&head->coh_lock_guard);
-               if (lock == NULL)
-                       return NULL;
+       /* Make sure cl_lock::cll_descr is initialized. */
+       LASSERT(obj != NULL);
 
 
-               cl_lock_mutex_get(env, lock);
-               if (lock->cll_state == CLS_INTRANSIT)
-                       /* Don't care return value. */
-                       cl_lock_state_wait(env, lock);
-               if (lock->cll_state == CLS_FREEING) {
-                       cl_lock_mutex_put(env, lock);
-                       cl_lock_put(env, lock);
-                       lock = NULL;
+       INIT_LIST_HEAD(&lock->cll_layers);
+       list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
+                           co_lu.lo_linkage) {
+               result = scan->co_ops->coo_lock_init(env, scan, lock, io);
+               if (result != 0) {
+                       cl_lock_fini(env, lock);
+                       break;
                }
                }
-       } while (lock == NULL);
-
-       cl_lock_hold_add(env, lock, scope, source);
-       cl_lock_user_add(env, lock);
-       if (lock->cll_state == CLS_CACHED)
-               cl_use_try(env, lock, 1);
-       if (lock->cll_state == CLS_HELD) {
-               cl_lock_mutex_put(env, lock);
-               cl_lock_lockdep_acquire(env, lock, 0);
-               cl_lock_put(env, lock);
-       } else {
-               cl_unuse_try(env, lock);
-               cl_lock_unhold(env, lock, scope, source);
-                cl_lock_mutex_put(env, lock);
-                cl_lock_put(env, lock);
-                lock = NULL;
-        }
-
-        return lock;
+       }
+       RETURN(result);
 }
 }
-EXPORT_SYMBOL(cl_lock_peek);
+EXPORT_SYMBOL(cl_lock_init);
 
 /**
 
 /**
- * Returns a slice within a lock, corresponding to the given layer in the
+ * Returns a slice with a lock, corresponding to the given layer in the
  * device stack.
  *
  * \see cl_page_at()
  */
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
  * device stack.
  *
  * \see cl_page_at()
  */
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
-                                       const struct lu_device_type *dtype)
+                                      const struct lu_device_type *dtype)
 {
 {
-        const struct cl_lock_slice *slice;
+       const struct cl_lock_slice *slice;
 
 
-        LINVRNT(cl_lock_invariant_trusted(NULL, lock));
-        ENTRY;
+       ENTRY;
 
        list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
 
        list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
-                        RETURN(slice);
-        }
-        RETURN(NULL);
+               if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
+                       RETURN(slice);
+       }
+       RETURN(NULL);
 }
 EXPORT_SYMBOL(cl_lock_at);
 
 }
 EXPORT_SYMBOL(cl_lock_at);
 
-static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
 {
 {
-        struct cl_thread_counters *counters;
+       const struct cl_lock_slice *slice;
+       ENTRY;
 
 
-        counters = cl_lock_counters(env, lock);
-        lock->cll_depth++;
-        counters->ctc_nr_locks_locked++;
-        lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
-        cl_lock_trace(D_TRACE, env, "got mutex", lock);
+       cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
+       list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+               if (slice->cls_ops->clo_cancel != NULL)
+                       slice->cls_ops->clo_cancel(env, slice);
+       }
+
+       EXIT;
 }
 }
+EXPORT_SYMBOL(cl_lock_cancel);
 
 /**
 
 /**
- * Locks cl_lock object.
- *
- * This is used to manipulate cl_lock fields, and to serialize state
- * transitions in the lock state machine.
- *
- * \post cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_put()
+ * Enqueue a lock.
+ * \param anchor: if we need to wait for resources before getting the lock,
+ *                use @anchor for the purpose.
+ * \retval 0  enqueue successfully
+ * \retval <0 error code
  */
  */
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+                   struct cl_lock *lock, struct cl_sync_io *anchor)
 {
 {
-       LINVRNT(cl_lock_invariant(env, lock));
+       const struct cl_lock_slice      *slice;
+       int                             rc = -ENOSYS;
 
 
-       if (lock->cll_guarder == current) {
-               LINVRNT(cl_lock_is_mutexed(lock));
-               LINVRNT(lock->cll_depth > 0);
-       } else {
-               struct cl_object_header *hdr;
-               struct cl_thread_info   *info;
-               int i;
+       ENTRY;
+
+       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+               if (slice->cls_ops->clo_enqueue == NULL)
+                       continue;
 
 
-               LINVRNT(lock->cll_guarder != current);
-               hdr = cl_object_header(lock->cll_descr.cld_obj);
-               /*
-                * Check that mutices are taken in the bottom-to-top order.
-                */
-               info = cl_env_info(env);
-               for (i = 0; i < hdr->coh_nesting; ++i)
-                       LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
-               mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
-               lock->cll_guarder = current;
-               LINVRNT(lock->cll_depth == 0);
+               rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
+               if (rc != 0)
+                       break;
        }
        }
-       cl_lock_mutex_tail(env, lock);
+       RETURN(rc);
 }
 }
-EXPORT_SYMBOL(cl_lock_mutex_get);
+EXPORT_SYMBOL(cl_lock_enqueue);
 
 /**
 
 /**
- * Try-locks cl_lock object.
- *
- * \retval 0 \a lock was successfully locked
- *
- * \retval -EBUSY \a lock cannot be locked right now
- *
- * \post ergo(result == 0, cl_lock_is_mutexed(lock))
- *
- * \see cl_lock_mutex_get()
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
  */
  */
-int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+                   struct cl_lock *lock)
 {
 {
-       int result;
-
-       LINVRNT(cl_lock_invariant_trusted(env, lock));
+       struct cl_sync_io       *anchor = NULL;
+       __u32                   enq_flags = lock->cll_descr.cld_enq_flags;
+       int                     rc;
        ENTRY;
 
        ENTRY;
 
-       result = 0;
-       if (lock->cll_guarder == current) {
-               LINVRNT(lock->cll_depth > 0);
-               cl_lock_mutex_tail(env, lock);
-       } else if (mutex_trylock(&lock->cll_guard)) {
-               LINVRNT(lock->cll_depth == 0);
-               lock->cll_guarder = current;
-               cl_lock_mutex_tail(env, lock);
-       } else
-               result = -EBUSY;
-       RETURN(result);
-}
-EXPORT_SYMBOL(cl_lock_mutex_try);
+       rc = cl_lock_init(env, lock, io);
+       if (rc < 0)
+               RETURN(rc);
 
 
-/**
- {* Unlocks cl_lock object.
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_get()
- */
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
-{
-       struct cl_thread_counters *counters;
+       if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
+               anchor = &cl_env_info(env)->clt_anchor;
+               cl_sync_io_init(anchor, 1, cl_sync_io_end);
+       }
 
 
-       LINVRNT(cl_lock_invariant(env, lock));
-       LINVRNT(cl_lock_is_mutexed(lock));
-       LINVRNT(lock->cll_guarder == current);
-       LINVRNT(lock->cll_depth > 0);
+       rc = cl_lock_enqueue(env, io, lock, anchor);
 
 
-       counters = cl_lock_counters(env, lock);
-       LINVRNT(counters->ctc_nr_locks_locked > 0);
+       if (anchor != NULL) {
+               int rc2;
 
 
-       cl_lock_trace(D_TRACE, env, "put mutex", lock);
-       lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
-       counters->ctc_nr_locks_locked--;
-       if (--lock->cll_depth == 0) {
-               lock->cll_guarder = NULL;
-               mutex_unlock(&lock->cll_guard);
+               /* drop the reference count held at initialization time */
+               cl_sync_io_note(env, anchor, 0);
+               rc2 = cl_sync_io_wait(env, anchor, 0);
+               if (rc2 < 0 && rc == 0)
+                       rc = rc2;
        }
        }
-}
-EXPORT_SYMBOL(cl_lock_mutex_put);
 
 
-/**
- * Returns true iff lock's mutex is owned by the current thread.
- */
-int cl_lock_is_mutexed(struct cl_lock *lock)
-{
-       return lock->cll_guarder == current;
+       if (rc < 0)
+               cl_lock_release(env, lock);
+       RETURN(rc);
 }
 }
-EXPORT_SYMBOL(cl_lock_is_mutexed);
+EXPORT_SYMBOL(cl_lock_request);
 
 /**
 
 /**
- * Returns number of cl_lock mutices held by the current thread (environment).
+ * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
  */
  */
-int cl_lock_nr_mutexed(const struct lu_env *env)
-{
-        struct cl_thread_info *info;
-        int i;
-        int locked;
-
-        /*
-         * NOTE: if summation across all nesting levels (currently 2) proves
-         *       too expensive, a summary counter can be added to
-         *       struct cl_thread_info.
-         */
-        info = cl_env_info(env);
-        for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
-                locked += info->clt_counters[i].ctc_nr_locks_locked;
-        return locked;
-}
-EXPORT_SYMBOL(cl_lock_nr_mutexed);
-
-static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-        ENTRY;
-        if (!(lock->cll_flags & CLF_CANCELLED)) {
-                const struct cl_lock_slice *slice;
-
-                lock->cll_flags |= CLF_CANCELLED;
-               list_for_each_entry_reverse(slice, &lock->cll_layers,
-                                           cls_linkage) {
-                        if (slice->cls_ops->clo_cancel != NULL)
-                                slice->cls_ops->clo_cancel(env, slice);
-                }
-        }
-        EXIT;
-}
-
-static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
 {
 {
-        struct cl_object_header    *head;
-        const struct cl_lock_slice *slice;
-
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-        ENTRY;
-        if (lock->cll_state < CLS_FREEING) {
-               bool in_cache;
-
-                LASSERT(lock->cll_state != CLS_INTRANSIT);
-                cl_lock_state_set(env, lock, CLS_FREEING);
-
-                head = cl_object_header(lock->cll_descr.cld_obj);
-
-               spin_lock(&head->coh_lock_guard);
-               in_cache = !list_empty(&lock->cll_linkage);
-               if (in_cache)
-                       list_del_init(&lock->cll_linkage);
-               spin_unlock(&head->coh_lock_guard);
-
-               if (in_cache) /* coh_locks cache holds a refcount. */
-                       cl_lock_put(env, lock);
+       ENTRY;
 
 
-               /*
-                * From now on, no new references to this lock can be acquired
-                * by cl_lock_lookup().
-                */
-               list_for_each_entry_reverse(slice, &lock->cll_layers,
-                                           cls_linkage) {
-                       if (slice->cls_ops->clo_delete != NULL)
-                               slice->cls_ops->clo_delete(env, slice);
-               }
-               /*
-                * From now on, no new references to this lock can be acquired
-                * by layer-specific means (like a pointer from struct
-                * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
-                * lov).
-                *
-                * Lock will be finally freed in cl_lock_put() when last of
-                * existing references goes away.
-                */
-       }
+       cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
+       cl_lock_cancel(env, lock);
+       cl_lock_fini(env, lock);
        EXIT;
 }
        EXIT;
 }
+EXPORT_SYMBOL(cl_lock_release);
 
 
-/**
- * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
- * top-lock (nesting == 0) accounts for this modification in the per-thread
- * debugging counters. Sub-lock holds can be released by a thread different
- * from one that acquired it.
- */
-static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
-                             int delta)
+const char *cl_lock_mode_name(const enum cl_lock_mode mode)
 {
 {
-        struct cl_thread_counters *counters;
-        enum clt_nesting_level     nesting;
-
-        lock->cll_holds += delta;
-        nesting = cl_lock_nesting(lock);
-        if (nesting == CNL_TOP) {
-                counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-                counters->ctc_nr_held += delta;
-                LASSERT(counters->ctc_nr_held >= 0);
-        }
+       static const char * const names[] = {
+               [CLM_READ]    = "R",
+               [CLM_WRITE]   = "W",
+               [CLM_GROUP]   = "G"
+       };
+       CLASSERT(CLM_MAX == ARRAY_SIZE(names));
+       return names[mode];
 }
 }
+EXPORT_SYMBOL(cl_lock_mode_name);
 
 /**
 
 /**
- * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
- * cl_lock_hold_mod() for the explanation of the debugging code.
+ * Prints human readable representation of a lock description.
  */
  */
-static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
-                             int delta)
-{
-        struct cl_thread_counters *counters;
-        enum clt_nesting_level     nesting;
-
-        lock->cll_users += delta;
-        nesting = cl_lock_nesting(lock);
-        if (nesting == CNL_TOP) {
-                counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-                counters->ctc_nr_used += delta;
-                LASSERT(counters->ctc_nr_used >= 0);
-        }
-}
-
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-                         const char *scope, const void *source)
+void cl_lock_descr_print(const struct lu_env *env, void *cookie,
+                        lu_printer_t printer,
+                        const struct cl_lock_descr *descr)
 {
 {
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-        LASSERT(lock->cll_holds > 0);
+       const struct lu_fid  *fid;
 
 
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
-        lu_ref_del(&lock->cll_holders, scope, source);
-        cl_lock_hold_mod(env, lock, -1);
-        if (lock->cll_holds == 0) {
-               CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
-               if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
-                   lock->cll_descr.cld_mode == CLM_GROUP ||
-                   lock->cll_state != CLS_CACHED)
-                        /*
-                         * If lock is still phantom or grouplock when user is
-                         * done with it---destroy the lock.
-                         */
-                        lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
-                if (lock->cll_flags & CLF_CANCELPEND) {
-                        lock->cll_flags &= ~CLF_CANCELPEND;
-                        cl_lock_cancel0(env, lock);
-                }
-                if (lock->cll_flags & CLF_DOOMED) {
-                        /* no longer doomed: it's dead... Jim. */
-                        lock->cll_flags &= ~CLF_DOOMED;
-                        cl_lock_delete0(env, lock);
-                }
-        }
-        EXIT;
+       fid = lu_object_fid(&descr->cld_obj->co_lu);
+       (*printer)(env, cookie, DDESCR"@"DFID, PDESCR(descr), PFID(fid));
 }
 }
-EXPORT_SYMBOL(cl_lock_hold_release);
+EXPORT_SYMBOL(cl_lock_descr_print);
 
 /**
 
 /**
- * Waits until lock state is changed.
- *
- * This function is called with cl_lock mutex locked, atomically releases
- * mutex and goes to sleep, waiting for a lock state change (signaled by
- * cl_lock_signal()), and re-acquires the mutex before return.
- *
- * This function is used to wait until lock state machine makes some progress
- * and to emulate synchronous operations on top of asynchronous lock
- * interface.
- *
- * \retval -EINTR wait was interrupted
- *
- * \retval 0 wait wasn't interrupted
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_signal()
+ * Prints human readable representation of \a lock to the \a f.
  */
  */
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-       wait_queue_t waiter;
-       sigset_t blocked;
-       int result;
-
-       ENTRY;
-       LINVRNT(cl_lock_is_mutexed(lock));
-       LINVRNT(cl_lock_invariant(env, lock));
-       LASSERT(lock->cll_depth == 1);
-       LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
-
-       cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
-       result = lock->cll_error;
-       if (result == 0) {
-               /* To avoid being interrupted by the 'non-fatal' signals
-                * (SIGCHLD, for instance), we'd block them temporarily.
-                * LU-305 */
-               blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
-
-               init_waitqueue_entry_current(&waiter);
-               add_wait_queue(&lock->cll_wq, &waiter);
-               set_current_state(TASK_INTERRUPTIBLE);
-               cl_lock_mutex_put(env, lock);
-
-               LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-               /* Returning ERESTARTSYS instead of EINTR so syscalls
-                * can be restarted if signals are pending here */
-               result = -ERESTARTSYS;
-               if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
-                       waitq_wait(&waiter, TASK_INTERRUPTIBLE);
-                       if (!cfs_signal_pending())
-                               result = 0;
-               }
-
-               cl_lock_mutex_get(env, lock);
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&lock->cll_wq, &waiter);
-
-               /* Restore old blocked signals */
-               cfs_restore_sigs(blocked);
-       }
-       RETURN(result);
-}
-EXPORT_SYMBOL(cl_lock_state_wait);
-
-static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
-                                enum cl_lock_state state)
+void cl_lock_print(const struct lu_env *env, void *cookie,
+                  lu_printer_t printer, const struct cl_lock *lock)
 {
        const struct cl_lock_slice *slice;
 
 {
        const struct cl_lock_slice *slice;
 
-       ENTRY;
-       LINVRNT(cl_lock_is_mutexed(lock));
-       LINVRNT(cl_lock_invariant(env, lock));
-
-       list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
-               if (slice->cls_ops->clo_state != NULL)
-                       slice->cls_ops->clo_state(env, slice, state);
-       wake_up_all(&lock->cll_wq);
-       EXIT;
-}
+       (*printer)(env, cookie, "lock@%p", lock);
+       cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
+       (*printer)(env, cookie, " {\n");
 
 
-/**
- * Notifies waiters that lock state changed.
- *
- * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
- * layers about state change by calling cl_lock_operations::clo_state()
- * top-to-bottom.
- */
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
-{
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
-        cl_lock_state_signal(env, lock, lock->cll_state);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_signal);
-
-/**
- * Changes lock state.
- *
- * This function is invoked to notify layers that lock state changed, possible
- * as a result of an asynchronous event such as call-back reception.
- *
- * \post lock->cll_state == state
- *
- * \see cl_lock_operations::clo_state()
- */
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-                       enum cl_lock_state state)
-{
-        ENTRY;
-        LASSERT(lock->cll_state <= state ||
-                (lock->cll_state == CLS_CACHED &&
-                 (state == CLS_HELD || /* lock found in cache */
-                  state == CLS_NEW  ||   /* sub-lock canceled */
-                  state == CLS_INTRANSIT)) ||
-                /* lock is in transit state */
-                lock->cll_state == CLS_INTRANSIT);
-
-        if (lock->cll_state != state) {
-               CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
-               CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
-
-                cl_lock_state_signal(env, lock, state);
-                lock->cll_state = state;
-        }
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_state_set);
-
-static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
-{
-       const struct cl_lock_slice *slice;
-       int result;
-
-       do {
-               result = 0;
-
-               LINVRNT(cl_lock_is_mutexed(lock));
-               LINVRNT(cl_lock_invariant(env, lock));
-               LASSERT(lock->cll_state == CLS_INTRANSIT);
-
-               result = -ENOSYS;
-               list_for_each_entry_reverse(slice, &lock->cll_layers,
-                                           cls_linkage) {
-                       if (slice->cls_ops->clo_unuse != NULL) {
-                               result = slice->cls_ops->clo_unuse(env, slice);
-                               if (result != 0)
-                                       break;
-                       }
-               }
-               LASSERT(result != -ENOSYS);
-       } while (result == CLO_REPEAT);
-
-       return result;
-}
-
-/**
- * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
- * cl_lock_operations::clo_use() top-to-bottom to notify layers.
- * @atomic = 1, it must unuse the lock to recovery the lock to keep the
- *  use process atomic
- */
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
-{
-        const struct cl_lock_slice *slice;
-        int result;
-        enum cl_lock_state state;
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
-
-        LASSERT(lock->cll_state == CLS_CACHED);
-        if (lock->cll_error)
-                RETURN(lock->cll_error);
-
-        result = -ENOSYS;
-        state = cl_lock_intransit(env, lock);
-       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_ops->clo_use != NULL) {
-                        result = slice->cls_ops->clo_use(env, slice);
-                        if (result != 0)
-                                break;
-                }
-        }
-        LASSERT(result != -ENOSYS);
-
-        LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
-                 lock->cll_state);
-
-        if (result == 0) {
-                state = CLS_HELD;
-        } else {
-                if (result == -ESTALE) {
-                        /*
-                         * ESTALE means sublock being cancelled
-                         * at this time, and set lock state to
-                         * be NEW here and ask the caller to repeat.
-                         */
-                        state = CLS_NEW;
-                        result = CLO_REPEAT;
-                }
-
-                /* @atomic means back-off-on-failure. */
-                if (atomic) {
-                        int rc;
-                        rc = cl_unuse_try_internal(env, lock);
-                        /* Vet the results. */
-                        if (rc < 0 && result > 0)
-                                result = rc;
-                }
-
-        }
-        cl_lock_extransit(env, lock, state);
-        RETURN(result);
-}
-EXPORT_SYMBOL(cl_use_try);
-
-/**
- * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
- * top-to-bottom.
- */
-static int cl_enqueue_kick(const struct lu_env *env,
-                           struct cl_lock *lock,
-                           struct cl_io *io, __u32 flags)
-{
-        int result;
-        const struct cl_lock_slice *slice;
-
-        ENTRY;
-        result = -ENOSYS;
-       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_ops->clo_enqueue != NULL) {
-                        result = slice->cls_ops->clo_enqueue(env,
-                                                             slice, io, flags);
-                        if (result != 0)
-                                break;
-                }
-        }
-        LASSERT(result != -ENOSYS);
-        RETURN(result);
-}
-
-/**
- * Tries to enqueue a lock.
- *
- * This function is called repeatedly by cl_enqueue() until either lock is
- * enqueued, or error occurs. This function does not block waiting for
- * networking communication to complete.
- *
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *                         lock->cll_state == CLS_HELD)
- *
- * \see cl_enqueue() cl_lock_operations::clo_enqueue()
- * \see cl_lock_state::CLS_ENQUEUED
- */
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-                   struct cl_io *io, __u32 flags)
-{
-        int result;
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
-        do {
-                LINVRNT(cl_lock_is_mutexed(lock));
-
-               result = lock->cll_error;
-               if (result != 0)
-                        break;
-
-                switch (lock->cll_state) {
-                case CLS_NEW:
-                        cl_lock_state_set(env, lock, CLS_QUEUING);
-                        /* fall-through */
-                case CLS_QUEUING:
-                        /* kick layers. */
-                        result = cl_enqueue_kick(env, lock, io, flags);
-                       /* For AGL case, the cl_lock::cll_state may
-                        * become CLS_HELD already. */
-                       if (result == 0 && lock->cll_state == CLS_QUEUING)
-                                cl_lock_state_set(env, lock, CLS_ENQUEUED);
-                        break;
-                case CLS_INTRANSIT:
-                        LASSERT(cl_lock_is_intransit(lock));
-                        result = CLO_WAIT;
-                        break;
-                case CLS_CACHED:
-                        /* yank lock from the cache. */
-                        result = cl_use_try(env, lock, 0);
-                        break;
-                case CLS_ENQUEUED:
-                case CLS_HELD:
-                        result = 0;
-                        break;
-                default:
-                case CLS_FREEING:
-                        /*
-                         * impossible, only held locks with increased
-                         * ->cll_holds can be enqueued, and they cannot be
-                         * freed.
-                         */
-                        LBUG();
-                }
-        } while (result == CLO_REPEAT);
-       RETURN(result);
-}
-EXPORT_SYMBOL(cl_enqueue_try);
-
-/**
- * Cancel the conflicting lock found during previous enqueue.
- *
- * \retval 0 conflicting lock has been canceled.
- * \retval -ve error code.
- */
-int cl_lock_enqueue_wait(const struct lu_env *env,
-                         struct cl_lock *lock,
-                         int keep_mutex)
-{
-        struct cl_lock  *conflict;
-        int              rc = 0;
-        ENTRY;
-
-        LASSERT(cl_lock_is_mutexed(lock));
-        LASSERT(lock->cll_state == CLS_QUEUING);
-        LASSERT(lock->cll_conflict != NULL);
-
-        conflict = lock->cll_conflict;
-        lock->cll_conflict = NULL;
-
-        cl_lock_mutex_put(env, lock);
-        LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-        cl_lock_mutex_get(env, conflict);
-       cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
-        cl_lock_cancel(env, conflict);
-        cl_lock_delete(env, conflict);
-
-        while (conflict->cll_state != CLS_FREEING) {
-                rc = cl_lock_state_wait(env, conflict);
-                if (rc != 0)
-                        break;
-        }
-        cl_lock_mutex_put(env, conflict);
-        lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
-        cl_lock_put(env, conflict);
-
-        if (keep_mutex)
-                cl_lock_mutex_get(env, lock);
-
-        LASSERT(rc <= 0);
-        RETURN(rc);
-}
-EXPORT_SYMBOL(cl_lock_enqueue_wait);
-
-static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
-                             struct cl_io *io, __u32 enqflags)
-{
-        int result;
-
-        ENTRY;
-
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-        LASSERT(lock->cll_holds > 0);
-
-        cl_lock_user_add(env, lock);
-        do {
-                result = cl_enqueue_try(env, lock, io, enqflags);
-                if (result == CLO_WAIT) {
-                        if (lock->cll_conflict != NULL)
-                                result = cl_lock_enqueue_wait(env, lock, 1);
-                        else
-                                result = cl_lock_state_wait(env, lock);
-                        if (result == 0)
-                                continue;
-                }
-                break;
-        } while (1);
-       if (result != 0)
-               cl_unuse_try(env, lock);
-        LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
-                     lock->cll_state == CLS_ENQUEUED ||
-                     lock->cll_state == CLS_HELD));
-        RETURN(result);
-}
-
-/**
- * Enqueues a lock.
- *
- * \pre current thread or io owns a hold on lock.
- *
- * \post ergo(result == 0, lock->users increased)
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *                         lock->cll_state == CLS_HELD)
- */
-int cl_enqueue(const struct lu_env *env, struct cl_lock *lock,
-               struct cl_io *io, __u32 enqflags)
-{
-        int result;
-
-        ENTRY;
-
-        cl_lock_lockdep_acquire(env, lock, enqflags);
-        cl_lock_mutex_get(env, lock);
-        result = cl_enqueue_locked(env, lock, io, enqflags);
-        cl_lock_mutex_put(env, lock);
-        if (result != 0)
-                cl_lock_lockdep_release(env, lock);
-        LASSERT(ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
-                     lock->cll_state == CLS_HELD));
-        RETURN(result);
-}
-EXPORT_SYMBOL(cl_enqueue);
-
-/**
- * Tries to unlock a lock.
- *
- * This function is called to release underlying resource:
- * 1. for top lock, the resource is sublocks it held;
- * 2. for sublock, the resource is the reference to dlmlock.
- *
- * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see cl_unuse() cl_lock_operations::clo_unuse()
- * \see cl_lock_state::CLS_CACHED
- */
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
-{
-        int                         result;
-        enum cl_lock_state          state = CLS_NEW;
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
-
-        if (lock->cll_users > 1) {
-                cl_lock_user_del(env, lock);
-                RETURN(0);
-        }
-
-       /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
-        * underlying resources. */
-       if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
-               cl_lock_user_del(env, lock);
-               RETURN(0);
-       }
-
-        /*
-         * New lock users (->cll_users) are not protecting unlocking
-         * from proceeding. From this point, lock eventually reaches
-         * CLS_CACHED, is reinitialized to CLS_NEW or fails into
-         * CLS_FREEING.
-         */
-        state = cl_lock_intransit(env, lock);
-
-        result = cl_unuse_try_internal(env, lock);
-        LASSERT(lock->cll_state == CLS_INTRANSIT);
-        LASSERT(result != CLO_WAIT);
-        cl_lock_user_del(env, lock);
-        if (result == 0 || result == -ESTALE) {
-                /*
-                 * Return lock back to the cache. This is the only
-                 * place where lock is moved into CLS_CACHED state.
-                 *
-                 * If one of ->clo_unuse() methods returned -ESTALE, lock
-                 * cannot be placed into cache and has to be
-                 * re-initialized. This happens e.g., when a sub-lock was
-                 * canceled while unlocking was in progress.
-                 */
-                if (state == CLS_HELD && result == 0)
-                        state = CLS_CACHED;
-                else
-                        state = CLS_NEW;
-                cl_lock_extransit(env, lock, state);
-
-                /*
-                 * Hide -ESTALE error.
-                 * If the lock is a glimpse lock, and it has multiple
-                 * stripes. Assuming that one of its sublock returned -ENAVAIL,
-                 * and other sublocks are matched write locks. In this case,
-                 * we can't set this lock to error because otherwise some of
-                 * its sublocks may not be canceled. This causes some dirty
-                 * pages won't be written to OSTs. -jay
-                 */
-                result = 0;
-        } else {
-                CERROR("result = %d, this is unlikely!\n", result);
-               state = CLS_NEW;
-                cl_lock_extransit(env, lock, state);
-        }
-       RETURN(result ?: lock->cll_error);
-}
-EXPORT_SYMBOL(cl_unuse_try);
-
-static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
-{
-        int result;
-        ENTRY;
-
-        result = cl_unuse_try(env, lock);
-        if (result)
-                CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
-
-        EXIT;
-}
-
-/**
- * Unlocks a lock.
- */
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
-{
-        ENTRY;
-        cl_lock_mutex_get(env, lock);
-        cl_unuse_locked(env, lock);
-        cl_lock_mutex_put(env, lock);
-        cl_lock_lockdep_release(env, lock);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_unuse);
-
-/**
- * Tries to wait for a lock.
- *
- * This function is called repeatedly by cl_wait() until either lock is
- * granted, or error occurs. This function does not block waiting for network
- * communication to complete.
- *
- * \see cl_wait() cl_lock_operations::clo_wait()
- * \see cl_lock_state::CLS_HELD
- */
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
-{
-        const struct cl_lock_slice *slice;
-        int                         result;
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
-        do {
-                LINVRNT(cl_lock_is_mutexed(lock));
-                LINVRNT(cl_lock_invariant(env, lock));
-               LASSERTF(lock->cll_state == CLS_QUEUING ||
-                        lock->cll_state == CLS_ENQUEUED ||
-                        lock->cll_state == CLS_HELD ||
-                        lock->cll_state == CLS_INTRANSIT,
-                        "lock state: %d\n", lock->cll_state);
-                LASSERT(lock->cll_users > 0);
-                LASSERT(lock->cll_holds > 0);
-
-               result = lock->cll_error;
-               if (result != 0)
-                        break;
-
-                if (cl_lock_is_intransit(lock)) {
-                        result = CLO_WAIT;
-                        break;
-                }
-
-                if (lock->cll_state == CLS_HELD)
-                        /* nothing to do */
-                        break;
-
-                result = -ENOSYS;
-               list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                        if (slice->cls_ops->clo_wait != NULL) {
-                                result = slice->cls_ops->clo_wait(env, slice);
-                                if (result != 0)
-                                        break;
-                        }
-                }
-                LASSERT(result != -ENOSYS);
-                if (result == 0) {
-                        LASSERT(lock->cll_state != CLS_INTRANSIT);
-                        cl_lock_state_set(env, lock, CLS_HELD);
-                }
-        } while (result == CLO_REPEAT);
-       RETURN(result);
-}
-EXPORT_SYMBOL(cl_wait_try);
-
-/**
- * Waits until enqueued lock is granted.
- *
- * \pre current thread or io owns a hold on the lock
- * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *                        lock->cll_state == CLS_HELD)
- *
- * \post ergo(result == 0, lock->cll_state == CLS_HELD)
- */
-int cl_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-        int result;
-
-        ENTRY;
-        cl_lock_mutex_get(env, lock);
-
-        LINVRNT(cl_lock_invariant(env, lock));
-        LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
-                 "Wrong state %d \n", lock->cll_state);
-        LASSERT(lock->cll_holds > 0);
-
-        do {
-                result = cl_wait_try(env, lock);
-                if (result == CLO_WAIT) {
-                        result = cl_lock_state_wait(env, lock);
-                        if (result == 0)
-                                continue;
-                }
-                break;
-        } while (1);
-        if (result < 0) {
-               cl_unuse_try(env, lock);
-                cl_lock_lockdep_release(env, lock);
-        }
-        cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
-        cl_lock_mutex_put(env, lock);
-        LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
-        RETURN(result);
-}
-EXPORT_SYMBOL(cl_wait);
-
-/**
- * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
- * value.
- */
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
-{
-        const struct cl_lock_slice *slice;
-        unsigned long pound;
-        unsigned long ounce;
-
-        ENTRY;
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-        pound = 0;
-       list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_ops->clo_weigh != NULL) {
-                        ounce = slice->cls_ops->clo_weigh(env, slice);
-                        pound += ounce;
-                        if (pound < ounce) /* over-weight^Wflow */
-                                pound = ~0UL;
-                }
-        }
-        RETURN(pound);
-}
-EXPORT_SYMBOL(cl_lock_weigh);
-
-/**
- * Notifies layers that lock description changed.
- *
- * The server can grant client a lock different from one that was requested
- * (e.g., larger in extent). This method is called when actually granted lock
- * description becomes known to let layers to accommodate for changed lock
- * description.
- *
- * \see cl_lock_operations::clo_modify()
- */
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-                   const struct cl_lock_descr *desc)
-{
-        const struct cl_lock_slice *slice;
-        struct cl_object           *obj = lock->cll_descr.cld_obj;
-        struct cl_object_header    *hdr = cl_object_header(obj);
-        int result;
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
-        /* don't allow object to change */
-        LASSERT(obj == desc->cld_obj);
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-       list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-                if (slice->cls_ops->clo_modify != NULL) {
-                        result = slice->cls_ops->clo_modify(env, slice, desc);
-                        if (result != 0)
-                                RETURN(result);
-                }
-        }
-        CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
-                      PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
-        /*
-         * Just replace description in place. Nothing more is needed for
-         * now. If locks were indexed according to their extent and/or mode,
-         * that index would have to be updated here.
-         */
-       spin_lock(&hdr->coh_lock_guard);
-       lock->cll_descr = *desc;
-       spin_unlock(&hdr->coh_lock_guard);
-       RETURN(0);
-}
-EXPORT_SYMBOL(cl_lock_modify);
-
-/**
- * Initializes lock closure with a given origin.
- *
- * \see cl_lock_closure
- */
-void cl_lock_closure_init(const struct lu_env *env,
-                          struct cl_lock_closure *closure,
-                          struct cl_lock *origin, int wait)
-{
-        LINVRNT(cl_lock_is_mutexed(origin));
-        LINVRNT(cl_lock_invariant(env, origin));
-
-       INIT_LIST_HEAD(&closure->clc_list);
-        closure->clc_origin = origin;
-        closure->clc_wait   = wait;
-        closure->clc_nr     = 0;
-}
-EXPORT_SYMBOL(cl_lock_closure_init);
-
-/**
- * Builds a closure of \a lock.
- *
- * Building of a closure consists of adding initial lock (\a lock) into it,
- * and calling cl_lock_operations::clo_closure() methods of \a lock. These
- * methods might call cl_lock_closure_build() recursively again, adding more
- * locks to the closure, etc.
- *
- * \see cl_lock_closure
- */
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-                          struct cl_lock_closure *closure)
-{
-        const struct cl_lock_slice *slice;
-        int result;
-
-        ENTRY;
-        LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
-        LINVRNT(cl_lock_invariant(env, closure->clc_origin));
-
-        result = cl_lock_enclosure(env, lock, closure);
-        if (result == 0) {
-               list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                        if (slice->cls_ops->clo_closure != NULL) {
-                                result = slice->cls_ops->clo_closure(env, slice,
-                                                                     closure);
-                                if (result != 0)
-                                        break;
-                        }
-                }
-        }
-        if (result != 0)
-                cl_lock_disclosure(env, closure);
-        RETURN(result);
-}
-EXPORT_SYMBOL(cl_lock_closure_build);
-
-/**
- * Adds new lock to a closure.
- *
- * Try-locks \a lock and if succeeded, adds it to the closure (never more than
- * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
- * until next try-lock is likely to succeed.
- */
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-                      struct cl_lock_closure *closure)
-{
-        int result = 0;
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
-        if (!cl_lock_mutex_try(env, lock)) {
-                /*
-                 * If lock->cll_inclosure is not empty, lock is already in
-                 * this closure.
-                 */
-               if (list_empty(&lock->cll_inclosure)) {
-                        cl_lock_get_trust(lock);
-                        lu_ref_add(&lock->cll_reference, "closure", closure);
-                       list_add(&lock->cll_inclosure, &closure->clc_list);
-                        closure->clc_nr++;
-                } else
-                        cl_lock_mutex_put(env, lock);
-                result = 0;
-        } else {
-                cl_lock_disclosure(env, closure);
-                if (closure->clc_wait) {
-                        cl_lock_get_trust(lock);
-                        lu_ref_add(&lock->cll_reference, "closure-w", closure);
-                        cl_lock_mutex_put(env, closure->clc_origin);
-
-                        LASSERT(cl_lock_nr_mutexed(env) == 0);
-                        cl_lock_mutex_get(env, lock);
-                        cl_lock_mutex_put(env, lock);
-
-                        cl_lock_mutex_get(env, closure->clc_origin);
-                        lu_ref_del(&lock->cll_reference, "closure-w", closure);
-                        cl_lock_put(env, lock);
-                }
-                result = CLO_REPEAT;
-        }
-        RETURN(result);
-}
-EXPORT_SYMBOL(cl_lock_enclosure);
-
-/** Releases mutices of enclosed locks. */
-void cl_lock_disclosure(const struct lu_env *env,
-                        struct cl_lock_closure *closure)
-{
-       struct cl_lock *scan;
-       struct cl_lock *temp;
-
-       cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
-       list_for_each_entry_safe(scan, temp, &closure->clc_list,
-                                cll_inclosure){
-               list_del_init(&scan->cll_inclosure);
-               cl_lock_mutex_put(env, scan);
-               lu_ref_del(&scan->cll_reference, "closure", closure);
-               cl_lock_put(env, scan);
-               closure->clc_nr--;
-       }
-       LASSERT(closure->clc_nr == 0);
-}
-EXPORT_SYMBOL(cl_lock_disclosure);
-
-/** Finalizes a closure. */
-void cl_lock_closure_fini(struct cl_lock_closure *closure)
-{
-        LASSERT(closure->clc_nr == 0);
-       LASSERT(list_empty(&closure->clc_list));
-}
-EXPORT_SYMBOL(cl_lock_closure_fini);
-
-/**
- * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
- * destroyed, then destroy the lock. If there are holds on the lock, postpone
- * destruction until all holds are released. This is called when a decision is
- * made to destroy the lock in the future. E.g., when a blocking AST is
- * received on it, or fatal communication error happens.
- *
- * Caller must have a reference on this lock to prevent a situation, when
- * deleted lock lingers in memory for indefinite time, because nobody calls
- * cl_lock_put() to finish it.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- *           cl_lock_nr_mutexed(env) == 1)
- *      [i.e., if a top-lock is deleted, mutices of no other locks can be
- *      held, as deletion of sub-locks might require releasing a top-lock
- *      mutex]
- *
- * \see cl_lock_operations::clo_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-        LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
-                     cl_lock_nr_mutexed(env) == 1));
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
-        if (lock->cll_holds == 0)
-                cl_lock_delete0(env, lock);
-        else
-                lock->cll_flags |= CLF_DOOMED;
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_delete);
-
-/**
- * Mark lock as irrecoverably failed, and mark it for destruction. This
- * happens when, e.g., server fails to grant a lock to us, or networking
- * time-out happens.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- *
- * \see clo_lock_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-        ENTRY;
-        if (lock->cll_error == 0 && error != 0) {
-               cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
-                lock->cll_error = error;
-                cl_lock_signal(env, lock);
-                cl_lock_cancel(env, lock);
-                cl_lock_delete(env, lock);
-        }
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_error);
-
-/**
- * Cancels this lock. Notifies layers
- * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
- * there are holds on the lock, postpone cancellation until
- * all holds are released.
- *
- * Cancellation notification is delivered to layers at most once.
- *
- * \see cl_lock_operations::clo_cancel()
- * \see cl_lock::cll_holds
- */
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
-        if (lock->cll_holds == 0)
-                cl_lock_cancel0(env, lock);
-        else
-                lock->cll_flags |= CLF_CANCELPEND;
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_cancel);
-
-/**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
- */
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-                                struct cl_object *obj, pgoff_t index,
-                                struct cl_lock *except,
-                                int pending, int canceld)
-{
-        struct cl_object_header *head;
-        struct cl_lock          *scan;
-        struct cl_lock          *lock;
-        struct cl_lock_descr    *need;
-
-        ENTRY;
-
-        head = cl_object_header(obj);
-        need = &cl_env_info(env)->clt_descr;
-        lock = NULL;
-
-        need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
-                                    * not PHANTOM */
-       need->cld_start = need->cld_end = index;
-        need->cld_enq_flags = 0;
-
-       spin_lock(&head->coh_lock_guard);
-        /* It is fine to match any group lock since there could be only one
-         * with a uniq gid and it conflicts with all other lock modes too */
-       list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
-                if (scan != except &&
-                    (scan->cll_descr.cld_mode == CLM_GROUP ||
-                    cl_lock_ext_match(&scan->cll_descr, need)) &&
-                    scan->cll_state >= CLS_HELD &&
-                    scan->cll_state < CLS_FREEING &&
-                    /*
-                     * This check is racy as the lock can be canceled right
-                     * after it is done, but this is fine, because page exists
-                     * already.
-                     */
-                    (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
-                    (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
-                        /* Don't increase cs_hit here since this
-                         * is just a helper function. */
-                        cl_lock_get_trust(scan);
-                        lock = scan;
-                        break;
-                }
-        }
-       spin_unlock(&head->coh_lock_guard);
-       RETURN(lock);
-}
-EXPORT_SYMBOL(cl_lock_at_pgoff);
-
-/**
- * Eliminate all locks for a given object.
- *
- * Caller has to guarantee that no lock is in active use.
- *
- * \param cancel when this is set, cl_locks_prune() cancels locks before
- *               destroying.
- */
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
-{
-       struct cl_object_header *head;
-       struct cl_lock          *lock;
-
-       ENTRY;
-       head = cl_object_header(obj);
-
-       spin_lock(&head->coh_lock_guard);
-       while (!list_empty(&head->coh_locks)) {
-               lock = container_of(head->coh_locks.next,
-                                   struct cl_lock, cll_linkage);
-               cl_lock_get_trust(lock);
-               spin_unlock(&head->coh_lock_guard);
-               lu_ref_add(&lock->cll_reference, "prune", current);
-
-again:
-               cl_lock_mutex_get(env, lock);
-               if (lock->cll_state < CLS_FREEING) {
-                       LASSERT(lock->cll_users <= 1);
-                       if (unlikely(lock->cll_users == 1)) {
-                               struct l_wait_info lwi = { 0 };
-
-                               cl_lock_mutex_put(env, lock);
-                               l_wait_event(lock->cll_wq,
-                                            lock->cll_users == 0,
-                                            &lwi);
-                               goto again;
-                       }
-
-                       if (cancel)
-                               cl_lock_cancel(env, lock);
-                       cl_lock_delete(env, lock);
-               }
-               cl_lock_mutex_put(env, lock);
-               lu_ref_del(&lock->cll_reference, "prune", current);
-               cl_lock_put(env, lock);
-               spin_lock(&head->coh_lock_guard);
-       }
-       spin_unlock(&head->coh_lock_guard);
-       EXIT;
-}
-EXPORT_SYMBOL(cl_locks_prune);
-
-static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
-                                          const struct cl_io *io,
-                                          const struct cl_lock_descr *need,
-                                          const char *scope, const void *source)
-{
-        struct cl_lock *lock;
-
-        ENTRY;
-
-        while (1) {
-                lock = cl_lock_find(env, io, need);
-                if (IS_ERR(lock))
-                        break;
-                cl_lock_mutex_get(env, lock);
-                if (lock->cll_state < CLS_FREEING &&
-                    !(lock->cll_flags & CLF_CANCELLED)) {
-                        cl_lock_hold_mod(env, lock, +1);
-                        lu_ref_add(&lock->cll_holders, scope, source);
-                        lu_ref_add(&lock->cll_reference, scope, source);
-                        break;
-                }
-                cl_lock_mutex_put(env, lock);
-                cl_lock_put(env, lock);
-        }
-        RETURN(lock);
-}
-
-/**
- * Returns a lock matching \a need description with a reference and a hold on
- * it.
- *
- * This is much like cl_lock_find(), except that cl_lock_hold() additionally
- * guarantees that lock is not in the CLS_FREEING state on return.
- */
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-                             const struct cl_lock_descr *need,
-                             const char *scope, const void *source)
-{
-        struct cl_lock *lock;
-
-        ENTRY;
-
-        lock = cl_lock_hold_mutex(env, io, need, scope, source);
-        if (!IS_ERR(lock))
-                cl_lock_mutex_put(env, lock);
-        RETURN(lock);
-}
-EXPORT_SYMBOL(cl_lock_hold);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-                                const struct cl_lock_descr *need,
-                                const char *scope, const void *source)
-{
-        struct cl_lock       *lock;
-        int                   rc;
-        __u32                 enqflags = need->cld_enq_flags;
-
-        ENTRY;
-        do {
-                lock = cl_lock_hold_mutex(env, io, need, scope, source);
-                if (IS_ERR(lock))
-                        break;
-
-                rc = cl_enqueue_locked(env, lock, io, enqflags);
-                if (rc == 0) {
-                        if (cl_lock_fits_into(env, lock, need, io)) {
-                                if (!(enqflags & CEF_AGL)) {
-                                        cl_lock_mutex_put(env, lock);
-                                        cl_lock_lockdep_acquire(env, lock,
-                                                                enqflags);
-                                        break;
-                                }
-                                rc = 1;
-                        }
-                        cl_unuse_locked(env, lock);
-                }
-                cl_lock_trace(D_DLMTRACE, env,
-                              rc <= 0 ? "enqueue failed" : "agl succeed", lock);
-                cl_lock_hold_release(env, lock, scope, source);
-                cl_lock_mutex_put(env, lock);
-                lu_ref_del(&lock->cll_reference, scope, source);
-                cl_lock_put(env, lock);
-                if (rc > 0) {
-                        LASSERT(enqflags & CEF_AGL);
-                        lock = NULL;
-                } else if (rc != 0) {
-                        lock = ERR_PTR(rc);
-                }
-        } while (rc == 0);
-        RETURN(lock);
-}
-EXPORT_SYMBOL(cl_lock_request);
-
-/**
- * Adds a hold to a known lock.
- */
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-                      const char *scope, const void *source)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-        LASSERT(lock->cll_state != CLS_FREEING);
-
-        ENTRY;
-        cl_lock_get(lock);
-       cl_lock_hold_mod(env, lock, +1);
-        lu_ref_add(&lock->cll_holders, scope, source);
-        lu_ref_add(&lock->cll_reference, scope, source);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_hold_add);
-
-/**
- * Releases a hold and a reference on a lock, on which caller acquired a
- * mutex.
- */
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-                    const char *scope, const void *source)
-{
-        LINVRNT(cl_lock_invariant(env, lock));
-        ENTRY;
-        cl_lock_hold_release(env, lock, scope, source);
-        lu_ref_del(&lock->cll_reference, scope, source);
-        cl_lock_put(env, lock);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_unhold);
-
-/**
- * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
- */
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-                     const char *scope, const void *source)
-{
-        LINVRNT(cl_lock_invariant(env, lock));
-        ENTRY;
-        cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
-        cl_lock_mutex_get(env, lock);
-        cl_lock_hold_release(env, lock, scope, source);
-        cl_lock_mutex_put(env, lock);
-        lu_ref_del(&lock->cll_reference, scope, source);
-        cl_lock_put(env, lock);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_release);
-
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
-{
-        LINVRNT(cl_lock_is_mutexed(lock));
-        LINVRNT(cl_lock_invariant(env, lock));
-
-        ENTRY;
-        cl_lock_used_mod(env, lock, +1);
-        EXIT;
-}
-EXPORT_SYMBOL(cl_lock_user_add);
-
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
-{
-       LINVRNT(cl_lock_is_mutexed(lock));
-       LINVRNT(cl_lock_invariant(env, lock));
-       LASSERT(lock->cll_users > 0);
-
-       ENTRY;
-       cl_lock_used_mod(env, lock, -1);
-       if (lock->cll_users == 0)
-               wake_up_all(&lock->cll_wq);
-       EXIT;
-}
-EXPORT_SYMBOL(cl_lock_user_del);
-
-const char *cl_lock_mode_name(const enum cl_lock_mode mode)
-{
-        static const char *names[] = {
-                [CLM_PHANTOM] = "P",
-                [CLM_READ]    = "R",
-                [CLM_WRITE]   = "W",
-                [CLM_GROUP]   = "G"
-        };
-       CLASSERT(CLM_MAX == ARRAY_SIZE(names));
-       return names[mode];
-}
-EXPORT_SYMBOL(cl_lock_mode_name);
-
-/**
- * Prints human readable representation of a lock description.
- */
-void cl_lock_descr_print(const struct lu_env *env, void *cookie,
-                       lu_printer_t printer,
-                       const struct cl_lock_descr *descr)
-{
-        const struct lu_fid  *fid;
-
-        fid = lu_object_fid(&descr->cld_obj->co_lu);
-        (*printer)(env, cookie, DDESCR"@"DFID, PDESCR(descr), PFID(fid));
-}
-EXPORT_SYMBOL(cl_lock_descr_print);
-
-/**
- * Prints human readable representation of \a lock to the \a f.
- */
-void cl_lock_print(const struct lu_env *env, void *cookie,
-                   lu_printer_t printer, const struct cl_lock *lock)
-{
-        const struct cl_lock_slice *slice;
-        (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
-                  lock, atomic_read(&lock->cll_ref),
-                   lock->cll_state, lock->cll_error, lock->cll_holds,
-                   lock->cll_users, lock->cll_flags);
-        cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
-        (*printer)(env, cookie, " {\n");
-
-       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-                (*printer)(env, cookie, "    %s@%p: ",
-                           slice->cls_obj->co_lu.lo_dev->ld_type->ldt_name,
-                           slice);
-                if (slice->cls_ops->clo_print != NULL)
-                        slice->cls_ops->clo_print(env, cookie, printer, slice);
-                (*printer)(env, cookie, "\n");
-        }
-        (*printer)(env, cookie, "} lock@%p\n", lock);
+       list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+               (*printer)(env, cookie, "    %s@%p: ",
+                          slice->cls_obj->co_lu.lo_dev->ld_type->ldt_name,
+                          slice);
+               if (slice->cls_ops->clo_print != NULL)
+                       slice->cls_ops->clo_print(env, cookie, printer, slice);
+               (*printer)(env, cookie, "\n");
+       }
+       (*printer)(env, cookie, "} lock@%p\n", lock);
 }
 EXPORT_SYMBOL(cl_lock_print);
 }
 EXPORT_SYMBOL(cl_lock_print);
-
-int cl_lock_init(void)
-{
-        return lu_kmem_init(cl_lock_caches);
-}
-
-void cl_lock_fini(void)
-{
-        lu_kmem_fini(cl_lock_caches);
-}
index d94ef6d..de52f90 100644 (file)
@@ -44,7 +44,6 @@
  *
  *  i_mutex
  *      PG_locked
  *
  *  i_mutex
  *      PG_locked
- *          ->coh_lock_guard
  *          ->coh_attr_guard
  *          ->ls_guard
  */
  *          ->coh_attr_guard
  *          ->ls_guard
  */
@@ -63,8 +62,6 @@
 
 static struct kmem_cache *cl_env_kmem;
 
 
 static struct kmem_cache *cl_env_kmem;
 
-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
 
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
 
@@ -80,11 +77,8 @@ int cl_object_header_init(struct cl_object_header *h)
        ENTRY;
        result = lu_object_header_init(&h->coh_lu);
        if (result == 0) {
        ENTRY;
        result = lu_object_header_init(&h->coh_lu);
        if (result == 0) {
-               spin_lock_init(&h->coh_lock_guard);
                spin_lock_init(&h->coh_attr_guard);
                spin_lock_init(&h->coh_attr_guard);
-               lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
                lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
                lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
-               INIT_LIST_HEAD(&h->coh_locks);
                h->coh_page_bufsize = 0;
        }
        RETURN(result);
                h->coh_page_bufsize = 0;
        }
        RETURN(result);
@@ -96,7 +90,6 @@ EXPORT_SYMBOL(cl_object_header_init);
  */
 void cl_object_header_fini(struct cl_object_header *h)
 {
  */
 void cl_object_header_fini(struct cl_object_header *h)
 {
-       LASSERT(list_empty(&h->coh_locks));
         lu_object_header_fini(&h->coh_lu);
 }
 EXPORT_SYMBOL(cl_object_header_fini);
         lu_object_header_fini(&h->coh_lu);
 }
 EXPORT_SYMBOL(cl_object_header_fini);
@@ -326,7 +319,7 @@ EXPORT_SYMBOL(cl_conf_set);
 /**
  * Prunes caches of pages and locks for this object.
  */
 /**
  * Prunes caches of pages and locks for this object.
  */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
 {
        struct lu_object_header *top;
        struct cl_object *o;
 {
        struct lu_object_header *top;
        struct cl_object *o;
@@ -343,10 +336,7 @@ void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
                }
        }
 
                }
        }
 
-       /* TODO: pruning locks will be moved into layers after cl_lock
-        * simplification is done */
-       cl_locks_prune(env, obj, 1);
-       EXIT;
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_object_prune);
 
 }
 EXPORT_SYMBOL(cl_object_prune);
 
@@ -359,38 +349,12 @@ EXPORT_SYMBOL(cl_object_prune);
  */
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
 {
  */
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
 {
-        struct cl_object_header *hdr;
-
-        hdr = cl_object_header(obj);
+       struct cl_object_header *hdr = cl_object_header(obj);
 
        set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
 
        set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
-        /*
-         * Destroy all locks. Object destruction (including cl_inode_fini())
-         * cannot cancel the locks, because in the case of a local client,
-         * where client and server share the same thread running
-         * prune_icache(), this can dead-lock with ldlm_cancel_handler()
-         * waiting on __wait_on_freeing_inode().
-         */
-        cl_locks_prune(env, obj, 0);
 }
 EXPORT_SYMBOL(cl_object_kill);
 
 }
 EXPORT_SYMBOL(cl_object_kill);
 
-/**
- * Check if the object has locks.
- */
-int cl_object_has_locks(struct cl_object *obj)
-{
-       struct cl_object_header *head = cl_object_header(obj);
-       int has;
-
-       spin_lock(&head->coh_lock_guard);
-       has = list_empty(&head->coh_locks);
-       spin_unlock(&head->coh_lock_guard);
-
-       return (has == 0);
-}
-EXPORT_SYMBOL(cl_object_has_locks);
-
 void cache_stats_init(struct cache_stats *cs, const char *name)
 {
        int i;
 void cache_stats_init(struct cache_stats *cs, const char *name)
 {
        int i;
@@ -439,11 +403,8 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
         result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
         if (result == 0) {
                 cache_stats_init(&s->cs_pages, "pages");
         result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
         if (result == 0) {
                 cache_stats_init(&s->cs_pages, "pages");
-                cache_stats_init(&s->cs_locks, "locks");
                 for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
                        atomic_set(&s->cs_pages_state[0], 0);
                 for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
                        atomic_set(&s->cs_pages_state[0], 0);
-                for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
-                       atomic_set(&s->cs_locks_state[i], 0);
                cl_env_percpu_refill();
        }
        return result;
                cl_env_percpu_refill();
        }
        return result;
@@ -477,15 +438,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
                [CPS_PAGEIN]    = "r",
                [CPS_FREEING]   = "f"
        };
                [CPS_PAGEIN]    = "r",
                [CPS_FREEING]   = "f"
        };
-       static const char *lstate[] = {
-               [CLS_NEW]       = "n",
-               [CLS_QUEUING]   = "q",
-               [CLS_ENQUEUED]  = "e",
-               [CLS_HELD]      = "h",
-               [CLS_INTRANSIT] = "t",
-               [CLS_CACHED]    = "c",
-               [CLS_FREEING]   = "f"
-       };
        int i;
 
 /*
        int i;
 
 /*
@@ -501,12 +453,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
                seq_printf(m, "%s: %u ", pstate[i],
                           atomic_read(&site->cs_pages_state[i]));
        seq_printf(m, "]\n");
                seq_printf(m, "%s: %u ", pstate[i],
                           atomic_read(&site->cs_pages_state[i]));
        seq_printf(m, "]\n");
-       cache_stats_print(&site->cs_locks, m, 0);
-       seq_printf(m, " [");
-       for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
-               seq_printf(m, "%s: %u ", lstate[i],
-                          atomic_read(&site->cs_locks_state[i]));
-       seq_printf(m, "]\n");
        cache_stats_print(&cl_env_stats, m, 0);
        seq_printf(m, "\n");
        return 0;
        cache_stats_print(&cl_env_stats, m, 0);
        seq_printf(m, "\n");
        return 0;
@@ -1255,12 +1201,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
 }
 EXPORT_SYMBOL(cl_stack_fini);
 
 }
 EXPORT_SYMBOL(cl_stack_fini);
 
-int  cl_lock_init(void);
-void cl_lock_fini(void);
-
-int  cl_page_init(void);
-void cl_page_fini(void);
-
 static struct lu_context_key cl_key;
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env)
 static struct lu_context_key cl_key;
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env)
@@ -1357,22 +1297,13 @@ int cl_global_init(void)
         if (result)
                 goto out_kmem;
 
         if (result)
                 goto out_kmem;
 
-        result = cl_lock_init();
-        if (result)
-                goto out_context;
-
-        result = cl_page_init();
-        if (result)
-                goto out_lock;
-
        result = cl_env_percpu_init();
        if (result)
                /* no cl_env_percpu_fini on error */
        result = cl_env_percpu_init();
        if (result)
                /* no cl_env_percpu_fini on error */
-               goto out_lock;
+               goto out_context;
 
         return 0;
 
         return 0;
-out_lock:
-        cl_lock_fini();
+
 out_context:
         lu_context_key_degister(&cl_key);
 out_kmem:
 out_context:
         lu_context_key_degister(&cl_key);
 out_kmem:
@@ -1388,8 +1319,6 @@ out_store:
 void cl_global_fini(void)
 {
        cl_env_percpu_fini();
 void cl_global_fini(void)
 {
        cl_env_percpu_fini();
-        cl_lock_fini();
-        cl_page_fini();
         lu_context_key_degister(&cl_key);
         lu_kmem_fini(cl_object_caches);
         cl_env_store_fini();
         lu_context_key_degister(&cl_key);
         lu_kmem_fini(cl_object_caches);
         cl_env_store_fini();
index d877717..625d8cb 100644 (file)
@@ -1154,12 +1154,3 @@ void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
        EXIT;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
        EXIT;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
-
-int  cl_page_init(void)
-{
-        return 0;
-}
-
-void cl_page_fini(void)
-{
-}
index f7ddd24..8bdc8f6 100644 (file)
@@ -176,7 +176,7 @@ struct echo_thread_info {
 
        struct cl_2queue        eti_queue;
        struct cl_io            eti_io;
 
        struct cl_2queue        eti_queue;
        struct cl_io            eti_io;
-       struct cl_lock_descr    eti_descr;
+       struct cl_lock          eti_lock;
        struct lu_fid           eti_fid;
        struct lu_fid           eti_fid2;
 #ifdef HAVE_SERVER_SUPPORT
        struct lu_fid           eti_fid;
        struct lu_fid           eti_fid2;
 #ifdef HAVE_SERVER_SUPPORT
@@ -347,26 +347,8 @@ static void echo_lock_fini(const struct lu_env *env,
         OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
 }
 
         OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
 }
 
-static void echo_lock_delete(const struct lu_env *env,
-                             const struct cl_lock_slice *slice)
-{
-        struct echo_lock *ecl      = cl2echo_lock(slice);
-
-       LASSERT(list_empty(&ecl->el_chain));
-}
-
-static int echo_lock_fits_into(const struct lu_env *env,
-                               const struct cl_lock_slice *slice,
-                               const struct cl_lock_descr *need,
-                               const struct cl_io *unused)
-{
-        return 1;
-}
-
 static struct cl_lock_operations echo_lock_ops = {
         .clo_fini      = echo_lock_fini,
 static struct cl_lock_operations echo_lock_ops = {
         .clo_fini      = echo_lock_fini,
-        .clo_delete    = echo_lock_delete,
-        .clo_fits_into = echo_lock_fits_into
 };
 
 /** @} echo_lock */
 };
 
 /** @} echo_lock */
@@ -946,18 +928,9 @@ static void echo_lock_release(const struct lu_env *env,
                               struct echo_lock *ecl,
                               int still_used)
 {
                               struct echo_lock *ecl,
                               int still_used)
 {
-        struct cl_lock *clk = echo_lock2cl(ecl);
+       struct cl_lock *clk = echo_lock2cl(ecl);
 
 
-        cl_lock_get(clk);
-        cl_unuse(env, clk);
-        cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
-        if (!still_used) {
-                cl_lock_mutex_get(env, clk);
-                cl_lock_cancel(env, clk);
-                cl_lock_delete(env, clk);
-                cl_lock_mutex_put(env, clk);
-        }
-        cl_lock_put(env, clk);
+       cl_lock_release(env, clk);
 }
 
 static struct lu_device *echo_device_free(const struct lu_env *env,
 }
 
 static struct lu_device *echo_device_free(const struct lu_env *env,
@@ -1157,9 +1130,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 
         info = echo_env_info(env);
         io = &info->eti_io;
 
         info = echo_env_info(env);
         io = &info->eti_io;
-        descr = &info->eti_descr;
-        obj = echo_obj2cl(eco);
+       lck = &info->eti_lock;
+       obj = echo_obj2cl(eco);
 
 
+       memset(lck, 0, sizeof(*lck));
+       descr = &lck->cll_descr;
         descr->cld_obj   = obj;
         descr->cld_start = cl_index(obj, start);
         descr->cld_end   = cl_index(obj, end);
         descr->cld_obj   = obj;
         descr->cld_start = cl_index(obj, start);
         descr->cld_end   = cl_index(obj, end);
@@ -1167,25 +1142,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
         descr->cld_enq_flags = enqflags;
         io->ci_obj = obj;
 
         descr->cld_enq_flags = enqflags;
         io->ci_obj = obj;
 
-        lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
-        if (lck) {
-                struct echo_client_obd *ec = eco->eo_dev->ed_ec;
-                struct echo_lock *el;
+       rc = cl_lock_request(env, io, lck);
+       if (rc == 0) {
+               struct echo_client_obd *ec = eco->eo_dev->ed_ec;
+               struct echo_lock *el;
 
 
-                rc = cl_wait(env, lck);
-                if (rc == 0) {
-                        el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
-                       spin_lock(&ec->ec_lock);
-                       if (list_empty(&el->el_chain)) {
-                               list_add(&el->el_chain, &ec->ec_locks);
-                               el->el_cookie = ++ec->ec_unique;
-                       }
-                       atomic_inc(&el->el_refcount);
-                       *cookie = el->el_cookie;
-                       spin_unlock(&ec->ec_lock);
-               } else {
-                       cl_lock_release(env, lck, "ec enqueue", current);
+               el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+               spin_lock(&ec->ec_lock);
+               if (list_empty(&el->el_chain)) {
+                       list_add(&el->el_chain, &ec->ec_locks);
+                       el->el_cookie = ++ec->ec_unique;
                }
                }
+               atomic_inc(&el->el_refcount);
+               *cookie = el->el_cookie;
+               spin_unlock(&ec->ec_lock);
        }
        RETURN(rc);
 }
        }
        RETURN(rc);
 }
index a1f5f8a..c55c4c2 100644 (file)
@@ -76,6 +76,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags)
        *buf++ = ext->oe_rw ? 'r' : 'w';
        if (ext->oe_intree)
                *buf++ = 'i';
        *buf++ = ext->oe_rw ? 'r' : 'w';
        if (ext->oe_intree)
                *buf++ = 'i';
+       if (ext->oe_sync)
+               *buf++ = 'S';
        if (ext->oe_srvlock)
                *buf++ = 's';
        if (ext->oe_hp)
        if (ext->oe_srvlock)
                *buf++ = 's';
        if (ext->oe_hp)
@@ -102,28 +104,32 @@ static inline char list_empty_marker(struct list_head *list)
 static const char *oes_strings[] = {
        "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL };
 
 static const char *oes_strings[] = {
        "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL };
 
-#define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do {                    \
-       struct osc_extent *__ext = (extent);                            \
-       char __buf[16];                                                 \
-                                                                       \
-       CDEBUG(lvl,                                                     \
-               "extent %p@{" EXTSTR ", "                               \
-               "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt,    \
-               /* ----- extent part 0 ----- */                         \
-               __ext, EXTPARA(__ext),                                  \
-               /* ----- part 1 ----- */                                \
-               atomic_read(&__ext->oe_refc),                           \
-               atomic_read(&__ext->oe_users),                          \
-               list_empty_marker(&__ext->oe_link),                     \
-               oes_strings[__ext->oe_state], ext_flags(__ext, __buf),  \
-               __ext->oe_obj,                                          \
-               /* ----- part 2 ----- */                                \
-               __ext->oe_grants, __ext->oe_nr_pages,                   \
-               list_empty_marker(&__ext->oe_pages),                    \
-               waitqueue_active(&__ext->oe_waitq) ? '+' : '-',         \
-               __ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner,     \
-               /* ----- part 4 ----- */                                \
-               ## __VA_ARGS__);                                        \
+#define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do {                          \
+       struct osc_extent *__ext = (extent);                                  \
+       char __buf[16];                                                       \
+                                                                             \
+       CDEBUG(lvl,                                                           \
+               "extent %p@{" EXTSTR ", "                                     \
+               "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt,          \
+               /* ----- extent part 0 ----- */                               \
+               __ext, EXTPARA(__ext),                                        \
+               /* ----- part 1 ----- */                                      \
+               atomic_read(&__ext->oe_refc),                         \
+               atomic_read(&__ext->oe_users),                        \
+               list_empty_marker(&__ext->oe_link),                           \
+               oes_strings[__ext->oe_state], ext_flags(__ext, __buf),        \
+               __ext->oe_obj,                                                \
+               /* ----- part 2 ----- */                                      \
+               __ext->oe_grants, __ext->oe_nr_pages,                         \
+               list_empty_marker(&__ext->oe_pages),                          \
+               waitqueue_active(&__ext->oe_waitq) ? '+' : '-',               \
+               __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner,           \
+               /* ----- part 4 ----- */                                      \
+               ## __VA_ARGS__);                                              \
+       if (lvl == D_ERROR && __ext->oe_dlmlock != NULL)                      \
+               LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext);         \
+       else                                                                  \
+               LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext);         \
 } while (0)
 
 #undef EASSERTF
 } while (0)
 
 #undef EASSERTF
@@ -217,15 +223,19 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
        if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start)
                GOTO(out, rc = 80);
 
        if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start)
                GOTO(out, rc = 80);
 
-       if (ext->oe_osclock == NULL && ext->oe_grants > 0)
+       if (ext->oe_sync && ext->oe_grants > 0)
                GOTO(out, rc = 90);
 
                GOTO(out, rc = 90);
 
-       if (ext->oe_osclock) {
-               struct cl_lock_descr *descr;
-               descr = &ext->oe_osclock->cll_descr;
-               if (!(descr->cld_start <= ext->oe_start &&
-                     descr->cld_end >= ext->oe_max_end))
+       if (ext->oe_dlmlock != NULL) {
+               struct ldlm_extent *extent;
+
+               extent = &ext->oe_dlmlock->l_policy_data.l_extent;
+               if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
+                     extent->end   >= cl_offset(osc2cl(obj), ext->oe_max_end)))
                        GOTO(out, rc = 100);
                        GOTO(out, rc = 100);
+
+               if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)))
+                       GOTO(out, rc = 102);
        }
 
        if (ext->oe_nr_pages > ext->oe_mppr)
        }
 
        if (ext->oe_nr_pages > ext->oe_mppr)
@@ -321,7 +331,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
        ext->oe_state = OES_INV;
        INIT_LIST_HEAD(&ext->oe_pages);
        init_waitqueue_head(&ext->oe_waitq);
        ext->oe_state = OES_INV;
        INIT_LIST_HEAD(&ext->oe_pages);
        init_waitqueue_head(&ext->oe_waitq);
-       ext->oe_osclock = NULL;
+       ext->oe_dlmlock = NULL;
 
        return ext;
 }
 
        return ext;
 }
@@ -347,9 +357,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
                LASSERT(ext->oe_state == OES_INV);
                LASSERT(!ext->oe_intree);
 
                LASSERT(ext->oe_state == OES_INV);
                LASSERT(!ext->oe_intree);
 
-               if (ext->oe_osclock) {
-                       cl_lock_put(env, ext->oe_osclock);
-                       ext->oe_osclock = NULL;
+               if (ext->oe_dlmlock != NULL) {
+                       lu_ref_add(&ext->oe_dlmlock->l_reference,
+                                  "osc_extent", ext);
+                       LDLM_LOCK_PUT(ext->oe_dlmlock);
+                       ext->oe_dlmlock = NULL;
                }
                osc_extent_free(ext);
        }
                }
                osc_extent_free(ext);
        }
@@ -504,7 +516,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
        if (cur->oe_max_end != victim->oe_max_end)
                return -ERANGE;
 
        if (cur->oe_max_end != victim->oe_max_end)
                return -ERANGE;
 
-       LASSERT(cur->oe_osclock == victim->oe_osclock);
+       LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
        ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
        chunk_start = cur->oe_start >> ppc_bits;
        chunk_end   = cur->oe_end   >> ppc_bits;
        ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
        chunk_start = cur->oe_start >> ppc_bits;
        chunk_end   = cur->oe_end   >> ppc_bits;
@@ -587,10 +599,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
 struct osc_extent *osc_extent_find(const struct lu_env *env,
                                   struct osc_object *obj, pgoff_t index,
                                   int *grants)
 struct osc_extent *osc_extent_find(const struct lu_env *env,
                                   struct osc_object *obj, pgoff_t index,
                                   int *grants)
-
 {
        struct client_obd *cli = osc_cli(obj);
 {
        struct client_obd *cli = osc_cli(obj);
-       struct cl_lock    *lock;
+       struct osc_lock   *olck;
+       struct cl_lock_descr *descr;
        struct osc_extent *cur;
        struct osc_extent *ext;
        struct osc_extent *conflict = NULL;
        struct osc_extent *cur;
        struct osc_extent *ext;
        struct osc_extent *conflict = NULL;
@@ -608,9 +620,12 @@ struct osc_extent *osc_extent_find(const struct lu_env *env,
        if (cur == NULL)
                RETURN(ERR_PTR(-ENOMEM));
 
        if (cur == NULL)
                RETURN(ERR_PTR(-ENOMEM));
 
-       lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
-       LASSERT(lock != NULL);
-       LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+       olck = osc_env_io(env)->oi_write_osclock;
+       LASSERTF(olck != NULL, "page %lu is not covered by lock\n", index);
+       LASSERT(olck->ols_state == OLS_GRANTED);
+
+       descr = &olck->ols_cl.cls_lock->cll_descr;
+       LASSERT(descr->cld_mode >= CLM_WRITE);
 
        LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
        ppc_bits   = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
 
        LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
        ppc_bits   = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
@@ -622,19 +637,23 @@ struct osc_extent *osc_extent_find(const struct lu_env *env,
        max_pages = cli->cl_max_pages_per_rpc;
        LASSERT((max_pages & ~chunk_mask) == 0);
        max_end = index - (index % max_pages) + max_pages - 1;
        max_pages = cli->cl_max_pages_per_rpc;
        LASSERT((max_pages & ~chunk_mask) == 0);
        max_end = index - (index % max_pages) + max_pages - 1;
-       max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+       max_end = min_t(pgoff_t, max_end, descr->cld_end);
 
        /* initialize new extent by parameters so far */
        cur->oe_max_end = max_end;
        cur->oe_start   = index & chunk_mask;
        cur->oe_end     = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
 
        /* initialize new extent by parameters so far */
        cur->oe_max_end = max_end;
        cur->oe_start   = index & chunk_mask;
        cur->oe_end     = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
-       if (cur->oe_start < lock->cll_descr.cld_start)
-               cur->oe_start = lock->cll_descr.cld_start;
+       if (cur->oe_start < descr->cld_start)
+               cur->oe_start = descr->cld_start;
        if (cur->oe_end > max_end)
                cur->oe_end = max_end;
        if (cur->oe_end > max_end)
                cur->oe_end = max_end;
-       cur->oe_osclock = lock;
        cur->oe_grants  = 0;
        cur->oe_mppr    = max_pages;
        cur->oe_grants  = 0;
        cur->oe_mppr    = max_pages;
+       if (olck->ols_dlmlock != NULL) {
+               LASSERT(olck->ols_hold);
+               cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
+               lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
+       }
 
        /* grants has been allocated by caller */
        LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
 
        /* grants has been allocated by caller */
        LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
@@ -656,7 +675,7 @@ restart:
                        break;
 
                /* if covering by different locks, no chance to match */
                        break;
 
                /* if covering by different locks, no chance to match */
-               if (lock != ext->oe_osclock) {
+               if (olck->ols_dlmlock != ext->oe_dlmlock) {
                        EASSERTF(!overlapped(ext, cur), ext,
                                 EXTSTR"\n", EXTPARA(cur));
 
                        EASSERTF(!overlapped(ext, cur), ext,
                                 EXTSTR"\n", EXTPARA(cur));
 
@@ -752,7 +771,7 @@ restart:
        if (found != NULL) {
                LASSERT(conflict == NULL);
                if (!IS_ERR(found)) {
        if (found != NULL) {
                LASSERT(conflict == NULL);
                if (!IS_ERR(found)) {
-                       LASSERT(found->oe_osclock == cur->oe_osclock);
+                       LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
                        OSC_EXTENT_DUMP(D_CACHE, found,
                                        "found caching ext for %lu.\n", index);
                }
                        OSC_EXTENT_DUMP(D_CACHE, found,
                                        "found caching ext for %lu.\n", index);
                }
@@ -767,7 +786,7 @@ restart:
                found = osc_extent_hold(cur);
                osc_extent_insert(obj, cur);
                OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
                found = osc_extent_hold(cur);
                osc_extent_insert(obj, cur);
                OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
-                               index, lock->cll_descr.cld_end);
+                               index, descr->cld_end);
        }
        osc_object_unlock(obj);
 
        }
        osc_object_unlock(obj);
 
@@ -2616,6 +2635,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
        }
 
        ext->oe_rw = !!(cmd & OBD_BRW_READ);
        }
 
        ext->oe_rw = !!(cmd & OBD_BRW_READ);
+       ext->oe_sync = 1;
        ext->oe_urgent = 1;
        ext->oe_start = start;
        ext->oe_end = ext->oe_max_end = end;
        ext->oe_urgent = 1;
        ext->oe_start = start;
        ext->oe_end = ext->oe_max_end = end;
@@ -3067,26 +3087,26 @@ static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
                                struct osc_page *ops, void *cbdata)
 {
        struct osc_thread_info *info = osc_env_info(env);
                                struct osc_page *ops, void *cbdata)
 {
        struct osc_thread_info *info = osc_env_info(env);
-       struct cl_lock *lock = cbdata;
+       struct osc_object *osc = cbdata;
        pgoff_t index;
 
        index = osc_index(ops);
        if (index >= info->oti_fn_index) {
        pgoff_t index;
 
        index = osc_index(ops);
        if (index >= info->oti_fn_index) {
-               struct cl_lock *tmp;
+               struct ldlm_lock *tmp;
                struct cl_page *page = ops->ops_cl.cpl_page;
 
                /* refresh non-overlapped index */
                struct cl_page *page = ops->ops_cl.cpl_page;
 
                /* refresh non-overlapped index */
-               tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
-                                      lock, 1, 0);
+               tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
                if (tmp != NULL) {
                if (tmp != NULL) {
+                       __u64 end = tmp->l_policy_data.l_extent.end;
                        /* Cache the first-non-overlapped index so as to skip
                        /* Cache the first-non-overlapped index so as to skip
-                        * all pages within [index, oti_fn_index). This
-                        * is safe because if tmp lock is canceled, it will
-                        * discard these pages. */
-                       info->oti_fn_index = tmp->cll_descr.cld_end + 1;
-                       if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
+                        * all pages within [index, oti_fn_index). This is safe
+                        * because if tmp lock is canceled, it will discard
+                        * these pages. */
+                       info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
+                       if (end == OBD_OBJECT_EOF)
                                info->oti_fn_index = CL_PAGE_EOF;
                                info->oti_fn_index = CL_PAGE_EOF;
-                       cl_lock_put(env, tmp);
+                       LDLM_LOCK_PUT(tmp);
                } else if (cl_page_own(env, io, page) == 0) {
                        /* discard the page */
                        cl_page_discard(env, io, page);
                } else if (cl_page_own(env, io, page) == 0) {
                        /* discard the page */
                        cl_page_discard(env, io, page);
@@ -3104,11 +3124,8 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
                      struct osc_page *ops, void *cbdata)
 {
        struct osc_thread_info *info = osc_env_info(env);
                      struct osc_page *ops, void *cbdata)
 {
        struct osc_thread_info *info = osc_env_info(env);
-       struct cl_lock *lock = cbdata;
        struct cl_page *page = ops->ops_cl.cpl_page;
 
        struct cl_page *page = ops->ops_cl.cpl_page;
 
-       LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
-
        /* page is top page. */
        info->oti_next_index = osc_index(ops) + 1;
        if (cl_page_own(env, io, page) == 0) {
        /* page is top page. */
        info->oti_next_index = osc_index(ops) + 1;
        if (cl_page_own(env, io, page) == 0) {
@@ -3133,32 +3150,29 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
  * If error happens on any step, the process continues anyway (the reasoning
  * behind this being that lock cancellation cannot be delayed indefinitely).
  */
  * If error happens on any step, the process continues anyway (the reasoning
  * behind this being that lock cancellation cannot be delayed indefinitely).
  */
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_lock *ols)
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+                          pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
 {
        struct osc_thread_info *info = osc_env_info(env);
        struct cl_io *io = &info->oti_io;
 {
        struct osc_thread_info *info = osc_env_info(env);
        struct cl_io *io = &info->oti_io;
-       struct cl_object *osc = ols->ols_cl.cls_obj;
-       struct cl_lock *lock = ols->ols_cl.cls_lock;
-       struct cl_lock_descr *descr = &lock->cll_descr;
        osc_page_gang_cbt cb;
        int res;
        int result;
 
        ENTRY;
 
        osc_page_gang_cbt cb;
        int res;
        int result;
 
        ENTRY;
 
-       io->ci_obj = cl_object_top(osc);
+       io->ci_obj = cl_object_top(osc2cl(osc));
        io->ci_ignore_layout = 1;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        if (result != 0)
                GOTO(out, result);
 
        io->ci_ignore_layout = 1;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        if (result != 0)
                GOTO(out, result);
 
-       cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
-       info->oti_fn_index = info->oti_next_index = descr->cld_start;
+       cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
+       info->oti_fn_index = info->oti_next_index = start;
        do {
        do {
-               res = osc_page_gang_lookup(env, io, cl2osc(osc),
-                                          info->oti_next_index, descr->cld_end,
-                                          cb, (void *)lock);
-               if (info->oti_next_index > descr->cld_end)
+               res = osc_page_gang_lookup(env, io, osc,
+                                          info->oti_next_index, end, cb, osc);
+               if (info->oti_next_index > end)
                        break;
 
                if (res == CLP_GANG_RESCHED)
                        break;
 
                if (res == CLP_GANG_RESCHED)
index 724a70a..6ed4179 100644 (file)
@@ -67,14 +67,17 @@ struct osc_io {
         struct cl_io_slice oi_cl;
         /** true if this io is lockless. */
         int                oi_lockless;
         struct cl_io_slice oi_cl;
         /** true if this io is lockless. */
         int                oi_lockless;
+       /** how many LRU pages are reserved for this IO */
+       int oi_lru_reserved;
+
        /** active extents, we know how many bytes is going to be written,
         * so having an active extent will prevent it from being fragmented */
        struct osc_extent *oi_active;
        /** partially truncated extent, we need to hold this extent to prevent
         * page writeback from happening. */
        struct osc_extent *oi_trunc;
        /** active extents, we know how many bytes is going to be written,
         * so having an active extent will prevent it from being fragmented */
        struct osc_extent *oi_active;
        /** partially truncated extent, we need to hold this extent to prevent
         * page writeback from happening. */
        struct osc_extent *oi_trunc;
-
-       int oi_lru_reserved;
+       /** write osc_lock for this IO, used by osc_extent_find(). */
+       struct osc_lock   *oi_write_osclock;
 
        struct obd_info    oi_info;
        struct obdo        oi_oa;
 
        struct obd_info    oi_info;
        struct obdo        oi_oa;
@@ -114,6 +117,7 @@ struct osc_thread_info {
         */
        pgoff_t                 oti_next_index;
        pgoff_t                 oti_fn_index; /* first non-overlapped index */
         */
        pgoff_t                 oti_next_index;
        pgoff_t                 oti_fn_index; /* first non-overlapped index */
+       struct cl_sync_io       oti_anchor;
 };
 
 struct osc_object {
 };
 
 struct osc_object {
@@ -177,6 +181,10 @@ struct osc_object {
        struct radix_tree_root  oo_tree;
        spinlock_t              oo_tree_lock;
        unsigned long           oo_npages;
        struct radix_tree_root  oo_tree;
        spinlock_t              oo_tree_lock;
        unsigned long           oo_npages;
+
+       /* Protect osc_lock this osc_object has */
+       spinlock_t              oo_ol_spin;
+       struct list_head        oo_ol_list;
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
@@ -216,8 +224,6 @@ enum osc_lock_state {
         OLS_ENQUEUED,
         OLS_UPCALL_RECEIVED,
         OLS_GRANTED,
         OLS_ENQUEUED,
         OLS_UPCALL_RECEIVED,
         OLS_GRANTED,
-        OLS_RELEASED,
-        OLS_BLOCKED,
         OLS_CANCELLED
 };
 
         OLS_CANCELLED
 };
 
@@ -226,10 +232,8 @@ enum osc_lock_state {
  *
  * Interaction with DLM.
  *
  *
  * Interaction with DLM.
  *
- * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
- *
  * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
  * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
  *
  * This pointer is protected through a reference, acquired by
  * osc_lock_upcall0(). Also, an additional reference is acquired by
  *
  * This pointer is protected through a reference, acquired by
  * osc_lock_upcall0(). Also, an additional reference is acquired by
@@ -266,17 +270,28 @@ enum osc_lock_state {
  * future.
  */
 struct osc_lock {
  * future.
  */
 struct osc_lock {
-        struct cl_lock_slice     ols_cl;
-        /** underlying DLM lock */
-        struct ldlm_lock        *ols_lock;
-        /** lock value block */
-        struct ost_lvb           ols_lvb;
-        /** DLM flags with which osc_lock::ols_lock was enqueued */
-       __u64                    ols_flags;
-        /** osc_lock::ols_lock handle */
-        struct lustre_handle     ols_handle;
-        struct ldlm_enqueue_info ols_einfo;
-        enum osc_lock_state      ols_state;
+       struct cl_lock_slice    ols_cl;
+       /** Internal lock to protect states, etc. */
+       spinlock_t              ols_lock;
+       /** Owner sleeps on this channel for state change */
+       struct cl_sync_io       *ols_owner;
+       /** waiting list for this lock to be cancelled */
+       struct list_head        ols_waiting_list;
+       /** wait entry of ols_waiting_list */
+       struct list_head        ols_wait_entry;
+       /** list entry for osc_object::oo_ol_list */
+       struct list_head        ols_nextlock_oscobj;
+
+       /** underlying DLM lock */
+       struct ldlm_lock        *ols_dlmlock;
+       /** DLM flags with which osc_lock::ols_lock was enqueued */
+       __u64                   ols_flags;
+       /** osc_lock::ols_lock handle */
+       struct lustre_handle     ols_handle;
+       struct ldlm_enqueue_info ols_einfo;
+       enum osc_lock_state      ols_state;
+       /** lock value block */
+       struct ost_lvb          ols_lvb;
 
         /**
          * true, if ldlm_lock_addref() was called against
 
         /**
          * true, if ldlm_lock_addref() was called against
@@ -307,16 +322,6 @@ struct osc_lock {
          */
                                  ols_locklessable:1,
         /**
          */
                                  ols_locklessable:1,
         /**
-         * set by osc_lock_use() to wait until blocking AST enters into
-         * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
-         * further synchronization.
-         */
-                                 ols_ast_wait:1,
-        /**
-         * If the data of this lock has been flushed to server side.
-         */
-                                 ols_flush:1,
-        /**
          * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
          * the EVAVAIL error as torerable, this will make upper logic happy
          * to wait all glimpse locks to each OSTs to be completed.
          * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
          * the EVAVAIL error as torerable, this will make upper logic happy
          * to wait all glimpse locks to each OSTs to be completed.
@@ -329,15 +334,6 @@ struct osc_lock {
          * For async glimpse lock.
          */
                                  ols_agl:1;
          * For async glimpse lock.
          */
                                  ols_agl:1;
-        /**
-         * IO that owns this lock. This field is used for a dead-lock
-         * avoidance by osc_lock_enqueue_wait().
-         *
-         * XXX: unfortunately, the owner of a osc_lock is not unique, 
-         * the lock may have multiple users, if the lock is granted and
-         * then matched.
-         */
-        struct osc_io           *ols_owner;
 };
 
 
 };
 
 
@@ -632,6 +628,8 @@ struct osc_extent {
        unsigned int            oe_intree:1,
        /** 0 is write, 1 is read */
                                oe_rw:1,
        unsigned int            oe_intree:1,
        /** 0 is write, 1 is read */
                                oe_rw:1,
+       /** sync extent, queued by osc_queue_sync_pages() */
+                               oe_sync:1,
                                oe_srvlock:1,
                                oe_memalloc:1,
        /** an ACTIVE extent is going to be truncated, so when this extent
                                oe_srvlock:1,
                                oe_memalloc:1,
        /** an ACTIVE extent is going to be truncated, so when this extent
@@ -673,7 +671,7 @@ struct osc_extent {
         * state has changed. */
        wait_queue_head_t       oe_waitq;
        /** lock covering this extent */
         * state has changed. */
        wait_queue_head_t       oe_waitq;
        /** lock covering this extent */
-       struct cl_lock          *oe_osclock;
+       struct ldlm_lock        *oe_dlmlock;
        /** terminator of this extent. Must be true if this extent is in IO. */
        struct task_struct      *oe_owner;
        /** return value of writeback. If somebody is waiting for this extent,
        /** terminator of this extent. Must be true if this extent is in IO. */
        struct task_struct      *oe_owner;
        /** return value of writeback. If somebody is waiting for this extent,
@@ -687,14 +685,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
                      int sent, int rc);
 int osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
 
                      int sent, int rc);
 int osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
 
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_lock *lock);
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+                          pgoff_t start, pgoff_t end, enum cl_lock_mode mode);
 
 typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
                                 struct osc_page *, void *);
 int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
                         struct osc_object *osc, pgoff_t start, pgoff_t end,
                         osc_page_gang_cbt cb, void *cbdata);
 
 typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
                                 struct osc_page *, void *);
 int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
                         struct osc_object *osc, pgoff_t start, pgoff_t end,
                         osc_page_gang_cbt cb, void *cbdata);
-
 /** @} osc */
 
 #endif /* OSC_CL_INTERNAL_H */
 /** @} osc */
 
 #endif /* OSC_CL_INTERNAL_H */
index ccba0e8..c353f1c 100644 (file)
@@ -106,13 +106,15 @@ void osc_update_next_shrink(struct client_obd *cli);
 
 extern struct ptlrpc_request_set *PTLRPCD_SET;
 
 
 extern struct ptlrpc_request_set *PTLRPCD_SET;
 
+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+                                   int rc);
+
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                     __u64 *flags, ldlm_policy_data_t *policy,
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                     __u64 *flags, ldlm_policy_data_t *policy,
-                     struct ost_lvb *lvb, int kms_valid,
-                     obd_enqueue_update_f upcall,
-                     void *cookie, struct ldlm_enqueue_info *einfo,
-                     struct lustre_handle *lockh,
-                     struct ptlrpc_request_set *rqset, int async, int agl);
+                    struct ost_lvb *lvb, int kms_valid,
+                    osc_enqueue_upcall_f upcall,
+                    void *cookie, struct ldlm_enqueue_info *einfo,
+                    struct ptlrpc_request_set *rqset, int async, int agl);
 int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
 
 int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
 
 int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
@@ -138,7 +140,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
                   long target, bool force);
 long osc_lru_reclaim(struct client_obd *cli);
 
                   long target, bool force);
 long osc_lru_reclaim(struct client_obd *cli);
 
-extern spinlock_t osc_ast_guard;
 unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
 
 int osc_cleanup(struct obd_device *obd);
 unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
 
 int osc_cleanup(struct obd_device *obd);
@@ -206,7 +207,9 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
 int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
                    struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
 int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
                    struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
-
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+                                      struct osc_object *obj, pgoff_t index,
+                                      int pending, int canceling);
 void osc_inc_unstable_pages(struct ptlrpc_request *req);
 void osc_dec_unstable_pages(struct ptlrpc_request *req);
 bool osc_over_unstable_soft_limit(struct client_obd *cli);
 void osc_inc_unstable_pages(struct ptlrpc_request *req);
 void osc_dec_unstable_pages(struct ptlrpc_request *req);
 bool osc_over_unstable_soft_limit(struct client_obd *cli);
index 61bc1a1..dca62a5 100644 (file)
@@ -361,6 +361,7 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
                atomic_long_add(oio->oi_lru_reserved, cli->cl_lru_left);
                oio->oi_lru_reserved = 0;
        }
                atomic_long_add(oio->oi_lru_reserved, cli->cl_lru_left);
                oio->oi_lru_reserved = 0;
        }
+       oio->oi_write_osclock = NULL;
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
@@ -765,8 +766,7 @@ static void osc_req_attr_set(const struct lu_env *env,
        struct lov_oinfo *oinfo;
        struct cl_req    *clerq;
        struct cl_page   *apage; /* _some_ page in @clerq */
        struct lov_oinfo *oinfo;
        struct cl_req    *clerq;
        struct cl_page   *apage; /* _some_ page in @clerq */
-       struct cl_lock   *lock;  /* _some_ lock protecting @apage */
-       struct osc_lock  *olck;
+       struct ldlm_lock *lock;  /* _some_ lock protecting @apage */
        struct osc_page  *opg;
        struct obdo      *oa;
        struct ost_lvb   *lvb;
        struct osc_page  *opg;
        struct obdo      *oa;
        struct ost_lvb   *lvb;
@@ -796,41 +796,37 @@ static void osc_req_attr_set(const struct lu_env *env,
                oa->o_valid |= OBD_MD_FLID;
        }
        if (flags & OBD_MD_FLHANDLE) {
                oa->o_valid |= OBD_MD_FLID;
        }
        if (flags & OBD_MD_FLHANDLE) {
-               struct cl_object *subobj;
-
                clerq = slice->crs_req;
                LASSERT(!list_empty(&clerq->crq_pages));
                apage = container_of(clerq->crq_pages.next,
                                     struct cl_page, cp_flight);
                opg = osc_cl_page_osc(apage, NULL);
                clerq = slice->crs_req;
                LASSERT(!list_empty(&clerq->crq_pages));
                apage = container_of(clerq->crq_pages.next,
                                     struct cl_page, cp_flight);
                opg = osc_cl_page_osc(apage, NULL);
-               subobj = opg->ops_cl.cpl_obj;
-               lock = cl_lock_at_pgoff(env, subobj, osc_index(opg),
-                                       NULL, 1, 1);
-               if (lock == NULL) {
-                       struct cl_object_header *head;
-                       struct cl_lock          *scan;
-
-                       head = cl_object_header(subobj);
-                       list_for_each_entry(scan, &head->coh_locks,
-                                           cll_linkage)
-                                CL_LOCK_DEBUG(D_ERROR, env, scan,
-                                              "no cover page!\n");
-                        CL_PAGE_DEBUG(D_ERROR, env, apage,
-                                      "dump uncover page!\n");
-                        libcfs_debug_dumpstack(NULL);
-                        LBUG();
-                }
+               lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+                                           1, 1);
+               if (lock == NULL && !opg->ops_srvlock) {
+                       struct ldlm_resource *res;
+                       struct ldlm_res_id *resname;
+
+                       CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
+
+                       resname = &osc_env_info(env)->oti_resname;
+                       ostid_build_res_name(&oinfo->loi_oi, resname);
+                       res = ldlm_resource_get(
+                               osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+                               NULL, resname, LDLM_EXTENT, 0);
+                       ldlm_resource_dump(D_ERROR, res);
+
+                       libcfs_debug_dumpstack(NULL);
+                       LBUG();
+               }
 
 
-                olck = osc_lock_at(lock);
-                LASSERT(olck != NULL);
-                LASSERT(ergo(opg->ops_srvlock, olck->ols_lock == NULL));
-                /* check for lockless io. */
-                if (olck->ols_lock != NULL) {
-                        oa->o_handle = olck->ols_lock->l_remote_handle;
-                        oa->o_valid |= OBD_MD_FLHANDLE;
-                }
-                cl_lock_put(env, lock);
-        }
+               /* check for lockless io. */
+               if (lock != NULL) {
+                       oa->o_handle = lock->l_remote_handle;
+                       oa->o_valid |= OBD_MD_FLHANDLE;
+                       LDLM_LOCK_PUT(lock);
+               }
+       }
 }
 
 static const struct cl_req_operations osc_req_ops = {
 }
 
 static const struct cl_req_operations osc_req_ops = {
index 0f46193..78a4653 100644 (file)
@@ -61,7 +61,6 @@ static const struct cl_lock_operations osc_lock_ops;
 static const struct cl_lock_operations osc_lock_lockless_ops;
 static void osc_lock_to_lockless(const struct lu_env *env,
                                  struct osc_lock *ols, int force);
 static const struct cl_lock_operations osc_lock_lockless_ops;
 static void osc_lock_to_lockless(const struct lu_env *env,
                                  struct osc_lock *ols, int force);
-static bool osc_lock_has_pages(struct osc_lock *olck);
 
 int osc_lock_is_lockless(const struct osc_lock *olck)
 {
 
 int osc_lock_is_lockless(const struct osc_lock *olck)
 {
@@ -89,11 +88,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
 static int osc_lock_invariant(struct osc_lock *ols)
 {
        struct ldlm_lock *lock        = osc_handle_ptr(&ols->ols_handle);
 static int osc_lock_invariant(struct osc_lock *ols)
 {
        struct ldlm_lock *lock        = osc_handle_ptr(&ols->ols_handle);
-       struct ldlm_lock *olock       = ols->ols_lock;
+       struct ldlm_lock *olock       = ols->ols_dlmlock;
        int               handle_used = lustre_handle_is_used(&ols->ols_handle);
 
        if (ergo(osc_lock_is_lockless(ols),
        int               handle_used = lustre_handle_is_used(&ols->ols_handle);
 
        if (ergo(osc_lock_is_lockless(ols),
-                ols->ols_locklessable && ols->ols_lock == NULL))
+                ols->ols_locklessable && ols->ols_dlmlock == NULL))
                return 1;
 
        /*
                return 1;
 
        /*
@@ -111,7 +110,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
                   ergo(lock == NULL, olock == NULL)))
                return 0;
        /*
                   ergo(lock == NULL, olock == NULL)))
                return 0;
        /*
-        * Check that ->ols_handle and ->ols_lock are consistent, but
+        * Check that ->ols_handle and ->ols_dlmlock are consistent, but
         * take into account that they are set at the different time.
         */
        if (! ergo(ols->ols_state == OLS_CANCELLED,
         * take into account that they are set at the different time.
         */
        if (! ergo(ols->ols_state == OLS_CANCELLED,
@@ -139,115 +138,15 @@ static int osc_lock_invariant(struct osc_lock *ols)
  *
  */
 
  *
  */
 
-/**
- * Breaks a link between osc_lock and dlm_lock.
- */
-static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
-{
-       struct ldlm_lock *dlmlock;
-
-       spin_lock(&osc_ast_guard);
-       dlmlock = olck->ols_lock;
-       if (dlmlock == NULL) {
-               spin_unlock(&osc_ast_guard);
-                return;
-        }
-
-        olck->ols_lock = NULL;
-        /* wb(); --- for all who checks (ols->ols_lock != NULL) before
-         * call to osc_lock_detach() */
-        dlmlock->l_ast_data = NULL;
-        olck->ols_handle.cookie = 0ULL;
-       spin_unlock(&osc_ast_guard);
-
-        lock_res_and_lock(dlmlock);
-        if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
-                struct cl_object *obj = olck->ols_cl.cls_obj;
-                struct cl_attr *attr  = &osc_env_info(env)->oti_attr;
-                __u64 old_kms;
-
-                cl_object_attr_lock(obj);
-                /* Must get the value under the lock to avoid possible races. */
-                old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
-                /* Update the kms. Need to loop all granted locks.
-                 * Not a problem for the client */
-                attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
-
-                cl_object_attr_set(env, obj, attr, CAT_KMS);
-                cl_object_attr_unlock(obj);
-        }
-        unlock_res_and_lock(dlmlock);
-
-        /* release a reference taken in osc_lock_upcall0(). */
-        LASSERT(olck->ols_has_ref);
-        lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
-        LDLM_LOCK_RELEASE(dlmlock);
-        olck->ols_has_ref = 0;
-}
-
-static int osc_lock_unhold(struct osc_lock *ols)
-{
-        int result = 0;
-
-        if (ols->ols_hold) {
-                ols->ols_hold = 0;
-                result = osc_cancel_base(&ols->ols_handle,
-                                         ols->ols_einfo.ei_mode);
-        }
-        return result;
-}
-
-static int osc_lock_unuse(const struct lu_env *env,
-                          const struct cl_lock_slice *slice)
-{
-        struct osc_lock *ols = cl2osc_lock(slice);
-
-        LINVRNT(osc_lock_invariant(ols));
-
-        switch (ols->ols_state) {
-        case OLS_NEW:
-                LASSERT(!ols->ols_hold);
-                LASSERT(ols->ols_agl);
-                return 0;
-       case OLS_UPCALL_RECEIVED:
-               osc_lock_unhold(ols);
-       case OLS_ENQUEUED:
-               LASSERT(!ols->ols_hold);
-               osc_lock_detach(env, ols);
-               ols->ols_state = OLS_NEW;
-               return 0;
-        case OLS_GRANTED:
-                LASSERT(!ols->ols_glimpse);
-                LASSERT(ols->ols_hold);
-                /*
-                 * Move lock into OLS_RELEASED state before calling
-                 * osc_cancel_base() so that possible synchronous cancellation
-                 * (that always happens e.g., for liblustre) sees that lock is
-                 * released.
-                 */
-                ols->ols_state = OLS_RELEASED;
-                return osc_lock_unhold(ols);
-        default:
-                CERROR("Impossible state: %d\n", ols->ols_state);
-                LBUG();
-        }
-}
-
 static void osc_lock_fini(const struct lu_env *env,
                           struct cl_lock_slice *slice)
 {
 static void osc_lock_fini(const struct lu_env *env,
                           struct cl_lock_slice *slice)
 {
-        struct osc_lock  *ols = cl2osc_lock(slice);
-
-        LINVRNT(osc_lock_invariant(ols));
-        /*
-         * ->ols_hold can still be true at this point if, for example, a
-         * thread that requested a lock was killed (and released a reference
-         * to the lock), before reply from a server was received. In this case
-         * lock is destroyed immediately after upcall.
-         */
-        osc_lock_unhold(ols);
-        LASSERT(ols->ols_lock == NULL);
-        OBD_SLAB_FREE_PTR(ols, osc_lock_kmem);
+       struct osc_lock  *ols = cl2osc_lock(slice);
+
+       LINVRNT(osc_lock_invariant(ols));
+       LASSERT(ols->ols_dlmlock == NULL);
+
+       OBD_SLAB_FREE_PTR(ols, osc_lock_kmem);
 }
 
 static void osc_lock_build_policy(const struct lu_env *env,
 }
 
 static void osc_lock_build_policy(const struct lu_env *env,
@@ -272,55 +171,12 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
                result |= LDLM_FL_HAS_INTENT;
        if (enqflags & CEF_DISCARD_DATA)
                result |= LDLM_FL_AST_DISCARD_DATA;
                result |= LDLM_FL_HAS_INTENT;
        if (enqflags & CEF_DISCARD_DATA)
                result |= LDLM_FL_AST_DISCARD_DATA;
+       if (enqflags & CEF_PEEK)
+               result |= LDLM_FL_TEST_LOCK;
        return result;
 }
 
 /**
        return result;
 }
 
 /**
- * Global spin-lock protecting consistency of ldlm_lock::l_ast_data
- * pointers. Initialized in osc_init().
- */
-spinlock_t osc_ast_guard;
-
-static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock)
-{
-       struct osc_lock *olck;
-
-       lock_res_and_lock(dlm_lock);
-       spin_lock(&osc_ast_guard);
-        olck = dlm_lock->l_ast_data;
-        if (olck != NULL) {
-                struct cl_lock *lock = olck->ols_cl.cls_lock;
-                /*
-                 * If osc_lock holds a reference on ldlm lock, return it even
-                 * when cl_lock is in CLS_FREEING state. This way
-                 *
-                 *         osc_ast_data_get(dlmlock) == NULL
-                 *
-                 * guarantees that all osc references on dlmlock were
-                 * released. osc_dlm_blocking_ast0() relies on that.
-                 */
-               if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) {
-                       cl_lock_get_trust(lock);
-                       lu_ref_add_atomic(&lock->cll_reference,
-                                         "ast", current);
-               } else
-                       olck = NULL;
-        }
-       spin_unlock(&osc_ast_guard);
-       unlock_res_and_lock(dlm_lock);
-       return olck;
-}
-
-static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
-{
-       struct cl_lock *lock;
-
-       lock = olck->ols_cl.cls_lock;
-       lu_ref_del(&lock->cll_reference, "ast", current);
-       cl_lock_put(env, lock);
-}
-
-/**
  * Updates object attributes from a lock value block (lvb) received together
  * with the DLM lock reply from the server. Copy of osc_update_enqueue()
  * logic.
  * Updates object attributes from a lock value block (lvb) received together
  * with the DLM lock reply from the server. Copy of osc_update_enqueue()
  * logic.
@@ -330,38 +186,34 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
  *
  * Called under lock and resource spin-locks.
  */
  *
  * Called under lock and resource spin-locks.
  */
-static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
-                                int rc)
+static void osc_lock_lvb_update(const struct lu_env *env,
+                               struct osc_object *osc,
+                               struct ldlm_lock *dlmlock,
+                               struct ost_lvb *lvb)
 {
 {
-        struct ost_lvb    *lvb;
-        struct cl_object  *obj;
-        struct lov_oinfo  *oinfo;
-        struct cl_attr    *attr;
-        unsigned           valid;
+       struct cl_object  *obj = osc2cl(osc);
+       struct lov_oinfo  *oinfo = osc->oo_oinfo;
+       struct cl_attr    *attr = &osc_env_info(env)->oti_attr;
+       unsigned           valid;
 
 
-        ENTRY;
-
-        if (!(olck->ols_flags & LDLM_FL_LVB_READY))
-                RETURN_EXIT;
+       ENTRY;
 
 
-        lvb   = &olck->ols_lvb;
-        obj   = olck->ols_cl.cls_obj;
-        oinfo = cl2osc(obj)->oo_oinfo;
-        attr  = &osc_env_info(env)->oti_attr;
-        valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
-        cl_lvb2attr(attr, lvb);
+       valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
+       if (lvb == NULL) {
+               LASSERT(dlmlock != NULL);
+               lvb = dlmlock->l_lvb_data;
+       }
+       cl_lvb2attr(attr, lvb);
 
 
-        cl_object_attr_lock(obj);
-        if (rc == 0) {
-                struct ldlm_lock  *dlmlock;
-                __u64 size;
+       cl_object_attr_lock(obj);
+       if (dlmlock != NULL) {
+               __u64 size;
 
 
-                dlmlock = olck->ols_lock;
-                LASSERT(dlmlock != NULL);
+               check_res_locked(dlmlock->l_resource);
 
 
-                /* re-grab LVB from a dlm lock under DLM spin-locks. */
-                *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+               LASSERT(lvb == dlmlock->l_lvb_data);
                 size = lvb->lvb_size;
                 size = lvb->lvb_size;
+
                 /* Extend KMS up to the end of this lock and no further
                  * A lock on [x,y] means a KMS of up to y + 1 bytes! */
                 if (size > dlmlock->l_policy_data.l_extent.end)
                 /* Extend KMS up to the end of this lock and no further
                  * A lock on [x,y] means a KMS of up to y + 1 bytes! */
                 if (size > dlmlock->l_policy_data.l_extent.end)
@@ -377,107 +229,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
                                    lvb->lvb_size, oinfo->loi_kms,
                                    dlmlock->l_policy_data.l_extent.end);
                 }
                                    lvb->lvb_size, oinfo->loi_kms,
                                    dlmlock->l_policy_data.l_extent.end);
                 }
-                ldlm_lock_allow_match_locked(dlmlock);
-        } else if (rc == -ENAVAIL && olck->ols_glimpse) {
-                CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
-                       " kms="LPU64"\n", lvb->lvb_size, oinfo->loi_kms);
-        } else
-                valid = 0;
-
-        if (valid != 0)
-                cl_object_attr_set(env, obj, attr, valid);
+               ldlm_lock_allow_match_locked(dlmlock);
+       }
 
 
-        cl_object_attr_unlock(obj);
+       cl_object_attr_set(env, obj, attr, valid);
+       cl_object_attr_unlock(obj);
 
 
-        EXIT;
+       EXIT;
 }
 
 }
 
-/**
- * Called when a lock is granted, from an upcall (when server returned a
- * granted lock), or from completion AST, when server returned a blocked lock.
- *
- * Called under lock and resource spin-locks, that are released temporarily
- * here.
- */
-static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
-                             struct ldlm_lock *dlmlock, int rc)
+static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
+                            struct lustre_handle *lockh, bool lvb_update)
 {
 {
-        struct ldlm_extent   *ext;
-        struct cl_lock       *lock;
-        struct cl_lock_descr *descr;
-
-        LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
-
-        ENTRY;
-        if (olck->ols_state < OLS_GRANTED) {
-                lock  = olck->ols_cl.cls_lock;
-                ext   = &dlmlock->l_policy_data.l_extent;
-                descr = &osc_env_info(env)->oti_descr;
-                descr->cld_obj = lock->cll_descr.cld_obj;
-
-                /* XXX check that ->l_granted_mode is valid. */
-                descr->cld_mode  = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
-                descr->cld_start = cl_index(descr->cld_obj, ext->start);
-                descr->cld_end   = cl_index(descr->cld_obj, ext->end);
-                descr->cld_gid   = ext->gid;
-                /*
-                 * tell upper layers the extent of the lock that was actually
-                 * granted
-                 */
-                olck->ols_state = OLS_GRANTED;
-                osc_lock_lvb_update(env, olck, rc);
-
-                /* release DLM spin-locks to allow cl_lock_{modify,signal}()
-                 * to take a semaphore on a parent lock. This is safe, because
-                 * spin-locks are needed to protect consistency of
-                 * dlmlock->l_*_mode and LVB, and we have finished processing
-                 * them. */
-                unlock_res_and_lock(dlmlock);
-                cl_lock_modify(env, lock, descr);
-                cl_lock_signal(env, lock);
-                LINVRNT(osc_lock_invariant(olck));
-                lock_res_and_lock(dlmlock);
-        }
-        EXIT;
-}
+       struct ldlm_lock *dlmlock;
 
 
-static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
+       dlmlock = ldlm_handle2lock_long(lockh, 0);
+       LASSERT(dlmlock != NULL);
 
 
-{
-        struct ldlm_lock *dlmlock;
+       /* lock reference taken by ldlm_handle2lock_long() is
+        * owned by osc_lock and released in osc_lock_detach()
+        */
+       lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl);
+       oscl->ols_has_ref = 1;
+
+       LASSERT(oscl->ols_dlmlock == NULL);
+       oscl->ols_dlmlock = dlmlock;
+
+       /* This may be a matched lock for glimpse request, do not hold
+        * lock reference in that case. */
+       if (!oscl->ols_glimpse) {
+               /* hold a refc for non glimpse lock which will
+                * be released in osc_lock_cancel() */
+               lustre_handle_copy(&oscl->ols_handle, lockh);
+               ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode);
+               oscl->ols_hold = 1;
+       }
 
 
-        ENTRY;
+       /* Lock must have been granted. */
+       lock_res_and_lock(dlmlock);
+       if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
+               struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent;
+               struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
+
+               /* extend the lock extent, otherwise it will have problem when
+                * we decide whether to grant a lockless lock. */
+               descr->cld_mode  = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
+               descr->cld_start = cl_index(descr->cld_obj, ext->start);
+               descr->cld_end   = cl_index(descr->cld_obj, ext->end);
+               descr->cld_gid   = ext->gid;
+
+               /* no lvb update for matched lock */
+               if (lvb_update) {
+                       LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+                       osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
+                                           dlmlock, NULL);
+               }
+               LINVRNT(osc_lock_invariant(oscl));
+       }
+       unlock_res_and_lock(dlmlock);
 
 
-        dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0);
-        LASSERT(dlmlock != NULL);
-
-        lock_res_and_lock(dlmlock);
-       spin_lock(&osc_ast_guard);
-       LASSERT(dlmlock->l_ast_data == olck);
-       LASSERT(olck->ols_lock == NULL);
-       olck->ols_lock = dlmlock;
-       spin_unlock(&osc_ast_guard);
-
-        /*
-         * Lock might be not yet granted. In this case, completion ast
-         * (osc_ldlm_completion_ast()) comes later and finishes lock
-         * granting.
-         */
-        if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
-                osc_lock_granted(env, olck, dlmlock, 0);
-        unlock_res_and_lock(dlmlock);
-
-        /*
-         * osc_enqueue_interpret() decrefs asynchronous locks, counter
-         * this.
-         */
-        ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode);
-        olck->ols_hold = 1;
-
-        /* lock reference taken by ldlm_handle2lock_long() is owned by
-         * osc_lock and released in osc_lock_detach() */
-        lu_ref_add(&dlmlock->l_reference, "osc_lock", olck);
-        olck->ols_has_ref = 1;
+       LASSERT(oscl->ols_state != OLS_GRANTED);
+       oscl->ols_state = OLS_GRANTED;
 }
 
 /**
 }
 
 /**
@@ -485,145 +297,127 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
  * received from a server, or after osc_enqueue_base() matched a local DLM
  * lock.
  */
  * received from a server, or after osc_enqueue_base() matched a local DLM
  * lock.
  */
-static int osc_lock_upcall(void *cookie, int errcode)
+static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
+                          int errcode)
 {
 {
-        struct osc_lock         *olck  = cookie;
-        struct cl_lock_slice    *slice = &olck->ols_cl;
-        struct cl_lock          *lock  = slice->cls_lock;
-        struct lu_env           *env;
-        struct cl_env_nest       nest;
+       struct osc_lock         *oscl  = cookie;
+       struct cl_lock_slice    *slice = &oscl->ols_cl;
+       struct lu_env           *env;
+       struct cl_env_nest      nest;
+       int                     rc;
 
 
-        ENTRY;
-        env = cl_env_nested_get(&nest);
-        if (!IS_ERR(env)) {
-                int rc;
-
-                cl_lock_mutex_get(env, lock);
-
-                LASSERT(lock->cll_state >= CLS_QUEUING);
-                if (olck->ols_state == OLS_ENQUEUED) {
-                        olck->ols_state = OLS_UPCALL_RECEIVED;
-                        rc = ldlm_error2errno(errcode);
-                } else if (olck->ols_state == OLS_CANCELLED) {
-                        rc = -EIO;
-                } else {
-                        CERROR("Impossible state: %d\n", olck->ols_state);
-                        LBUG();
-                }
-                if (rc) {
-                        struct ldlm_lock *dlmlock;
-
-                        dlmlock = ldlm_handle2lock(&olck->ols_handle);
-                        if (dlmlock != NULL) {
-                                lock_res_and_lock(dlmlock);
-                               spin_lock(&osc_ast_guard);
-                               LASSERT(olck->ols_lock == NULL);
-                               dlmlock->l_ast_data = NULL;
-                               olck->ols_handle.cookie = 0ULL;
-                               spin_unlock(&osc_ast_guard);
-                                ldlm_lock_fail_match_locked(dlmlock);
-                                unlock_res_and_lock(dlmlock);
-                                LDLM_LOCK_PUT(dlmlock);
-                        }
-                } else {
-                        if (olck->ols_glimpse)
-                                olck->ols_glimpse = 0;
-                        osc_lock_upcall0(env, olck);
-                }
+       ENTRY;
 
 
-                /* Error handling, some errors are tolerable. */
-                if (olck->ols_locklessable && rc == -EUSERS) {
-                        /* This is a tolerable error, turn this lock into
-                         * lockless lock.
-                         */
-                        osc_object_set_contended(cl2osc(slice->cls_obj));
-                        LASSERT(slice->cls_ops == &osc_lock_ops);
-
-                        /* Change this lock to ldlmlock-less lock. */
-                        osc_lock_to_lockless(env, olck, 1);
-                        olck->ols_state = OLS_GRANTED;
-                        rc = 0;
-                } else if (olck->ols_glimpse && rc == -ENAVAIL) {
-                        osc_lock_lvb_update(env, olck, rc);
-                        cl_lock_delete(env, lock);
-                        /* Hide the error. */
-                        rc = 0;
-                }
+       env = cl_env_nested_get(&nest);
+       /* should never happen, similar to osc_ldlm_blocking_ast(). */
+       LASSERT(!IS_ERR(env));
+
+       rc = ldlm_error2errno(errcode);
+       if (oscl->ols_state == OLS_ENQUEUED) {
+               oscl->ols_state = OLS_UPCALL_RECEIVED;
+       } else if (oscl->ols_state == OLS_CANCELLED) {
+               rc = -EIO;
+       } else {
+               CERROR("Impossible state: %d\n", oscl->ols_state);
+               LBUG();
+       }
 
 
-                if (rc == 0) {
-                        /* For AGL case, the RPC sponsor may exits the cl_lock
-                        *  processing without wait() called before related OSC
-                        *  lock upcall(). So update the lock status according
-                        *  to the enqueue result inside AGL upcall(). */
-                        if (olck->ols_agl) {
-                                lock->cll_flags |= CLF_FROM_UPCALL;
-                                cl_wait_try(env, lock);
-                                lock->cll_flags &= ~CLF_FROM_UPCALL;
-                        }
-                        cl_lock_signal(env, lock);
-                        /* del user for lock upcall cookie */
-                        if (olck->ols_agl) {
-                                if (!olck->ols_glimpse)
-                                        olck->ols_agl = 0;
-                               cl_unuse_try(env, lock);
-                       }
-                } else {
-                        /* del user for lock upcall cookie */
-                        if (olck->ols_agl)
-                               cl_lock_user_del(env, lock);
-                        cl_lock_error(env, lock, rc);
-                }
+       if (rc == 0)
+               osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
 
 
-               /* release cookie reference, acquired by osc_lock_enqueue() */
-               cl_lock_hold_release(env, lock, "upcall", lock);
-               cl_lock_mutex_put(env, lock);
+       /* Error handling, some errors are tolerable. */
+       if (oscl->ols_locklessable && rc == -EUSERS) {
+               /* This is a tolerable error, turn this lock into
+                * lockless lock.
+                */
+               osc_object_set_contended(cl2osc(slice->cls_obj));
+               LASSERT(slice->cls_ops == &osc_lock_ops);
+
+               /* Change this lock to ldlmlock-less lock. */
+               osc_lock_to_lockless(env, oscl, 1);
+               oscl->ols_state = OLS_GRANTED;
+               rc = 0;
+       } else if (oscl->ols_glimpse && rc == -ENAVAIL) {
+               LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+               osc_lock_lvb_update(env, cl2osc(slice->cls_obj),
+                                   NULL, &oscl->ols_lvb);
+               /* Hide the error. */
+               rc = 0;
+       }
 
 
-               lu_ref_del(&lock->cll_reference, "upcall", lock);
-               /* This maybe the last reference, so must be called after
-                * cl_lock_mutex_put(). */
-               cl_lock_put(env, lock);
+       if (oscl->ols_owner != NULL)
+               cl_sync_io_note(env, oscl->ols_owner, rc);
+       cl_env_nested_put(&nest, env);
 
 
-               cl_env_nested_put(&nest, env);
-       } else {
-               /* should never happen, similar to osc_ldlm_blocking_ast(). */
-               LBUG();
-       }
-       RETURN(errcode);
+       RETURN(rc);
 }
 
 }
 
-/**
- * Core of osc_dlm_blocking_ast() logic.
- */
-static void osc_lock_blocking(const struct lu_env *env,
-                              struct ldlm_lock *dlmlock,
-                              struct osc_lock *olck, int blocking)
+static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
+                              int errcode)
 {
 {
-        struct cl_lock *lock = olck->ols_cl.cls_lock;
-
-        LASSERT(olck->ols_lock == dlmlock);
-        CLASSERT(OLS_BLOCKED < OLS_CANCELLED);
-        LASSERT(!osc_lock_is_lockless(olck));
-
-        /*
-         * Lock might be still addref-ed here, if e.g., blocking ast
-         * is sent for a failed lock.
-         */
-        osc_lock_unhold(olck);
-
-        if (blocking && olck->ols_state < OLS_BLOCKED)
-                /*
-                 * Move osc_lock into OLS_BLOCKED before canceling the lock,
-                 * because it recursively re-enters osc_lock_blocking(), with
-                 * the state set to OLS_CANCELLED.
-                 */
-                olck->ols_state = OLS_BLOCKED;
-        /*
-         * cancel and destroy lock at least once no matter how blocking ast is
-         * entered (see comment above osc_ldlm_blocking_ast() for use
-         * cases). cl_lock_cancel() and cl_lock_delete() are idempotent.
-         */
-        cl_lock_cancel(env, lock);
-        cl_lock_delete(env, lock);
+       struct osc_object       *osc = cookie;
+       struct ldlm_lock        *dlmlock;
+       struct lu_env           *env;
+       struct cl_env_nest       nest;
+       ENTRY;
+
+       env = cl_env_nested_get(&nest);
+       LASSERT(!IS_ERR(env));
+
+       if (errcode == ELDLM_LOCK_MATCHED)
+               GOTO(out, errcode = ELDLM_OK);
+
+       if (errcode != ELDLM_OK)
+               GOTO(out, errcode);
+
+       dlmlock = ldlm_handle2lock(lockh);
+       LASSERT(dlmlock != NULL);
+
+       lock_res_and_lock(dlmlock);
+       LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+
+       /* there is no osc_lock associated with AGL lock */
+       osc_lock_lvb_update(env, osc, dlmlock, NULL);
+
+       unlock_res_and_lock(dlmlock);
+       LDLM_LOCK_PUT(dlmlock);
+
+out:
+       cl_object_put(env, osc2cl(osc));
+       cl_env_nested_put(&nest, env);
+       RETURN(ldlm_error2errno(errcode));
+}
+
+static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
+                         enum cl_lock_mode mode, int discard)
+{
+       struct lu_env           *env;
+       struct cl_env_nest      nest;
+       int                     rc = 0;
+       int                     rc2 = 0;
+
+       ENTRY;
+
+       env = cl_env_nested_get(&nest);
+       if (IS_ERR(env))
+               RETURN(PTR_ERR(env));
+
+       if (mode == CLM_WRITE) {
+               rc = osc_cache_writeback_range(env, obj, start, end, 1,
+                                              discard);
+               CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
+                      obj, start, end, rc,
+                      discard ? "discarded" : "written back");
+               if (rc > 0)
+                       rc = 0;
+       }
+
+       rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
+       if (rc == 0 && rc2 < 0)
+               rc = rc2;
+
+       cl_env_nested_put(&nest, env);
+       RETURN(rc);
 }
 
 /**
 }
 
 /**
@@ -634,66 +428,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
                                  struct ldlm_lock *dlmlock,
                                  void *data, int flag)
 {
                                  struct ldlm_lock *dlmlock,
                                  void *data, int flag)
 {
-        struct osc_lock *olck;
-        struct cl_lock  *lock;
-        int result;
-        int cancel;
-
-        LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING);
-
-        cancel = 0;
-        olck = osc_ast_data_get(dlmlock);
-        if (olck != NULL) {
-                lock = olck->ols_cl.cls_lock;
-                cl_lock_mutex_get(env, lock);
-                LINVRNT(osc_lock_invariant(olck));
-                if (olck->ols_ast_wait) {
-                        /* wake up osc_lock_use() */
-                        cl_lock_signal(env, lock);
-                        olck->ols_ast_wait = 0;
-                }
-                /*
-                 * Lock might have been canceled while this thread was
-                 * sleeping for lock mutex, but olck is pinned in memory.
-                 */
-                if (olck == dlmlock->l_ast_data) {
-                        /*
-                         * NOTE: DLM sends blocking AST's for failed locks
-                         *       (that are still in pre-OLS_GRANTED state)
-                         *       too, and they have to be canceled otherwise
-                         *       DLM lock is never destroyed and stuck in
-                         *       the memory.
-                         *
-                         *       Alternatively, ldlm_cli_cancel() can be
-                         *       called here directly for osc_locks with
-                         *       ols_state < OLS_GRANTED to maintain an
-                         *       invariant that ->clo_cancel() is only called
-                         *       for locks that were granted.
-                         */
-                        LASSERT(data == olck);
-                        osc_lock_blocking(env, dlmlock,
-                                          olck, flag == LDLM_CB_BLOCKING);
-                } else
-                        cancel = 1;
-                cl_lock_mutex_put(env, lock);
-                osc_ast_data_put(env, olck);
-        } else
-                /*
-                 * DLM lock exists, but there is no cl_lock attached to it.
-                 * This is a `normal' race. cl_object and its cl_lock's can be
-                 * removed by memory pressure, together with all pages.
-                 */
-                cancel = (flag == LDLM_CB_BLOCKING);
-
-        if (cancel) {
-                struct lustre_handle *lockh;
-
-                lockh = &osc_env_info(env)->oti_handle;
-                ldlm_lock2handle(dlmlock, lockh);
-               result = ldlm_cli_cancel(lockh, LCF_ASYNC);
-        } else
-                result = 0;
-        return result;
+       struct cl_object        *obj = NULL;
+       int                     result = 0;
+       int                     discard;
+       enum cl_lock_mode       mode = CLM_READ;
+       ENTRY;
+
+       LASSERT(flag == LDLM_CB_CANCELING);
+
+       lock_res_and_lock(dlmlock);
+       if (dlmlock->l_granted_mode != dlmlock->l_req_mode) {
+               dlmlock->l_ast_data = NULL;
+               unlock_res_and_lock(dlmlock);
+               RETURN(0);
+       }
+
+       discard = ldlm_is_discard_data(dlmlock);
+       if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))
+               mode = CLM_WRITE;
+
+       if (dlmlock->l_ast_data != NULL) {
+               obj = osc2cl(dlmlock->l_ast_data);
+               dlmlock->l_ast_data = NULL;
+
+               cl_object_get(obj);
+       }
+
+       unlock_res_and_lock(dlmlock);
+
+       /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
+        * the object has been destroyed. */
+       if (obj != NULL) {
+               struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent;
+               struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+               __u64 old_kms;
+
+               /* Destroy pages covered by the extent of the DLM lock */
+               result = osc_lock_flush(cl2osc(obj),
+                                       cl_index(obj, extent->start),
+                                       cl_index(obj, extent->end),
+                                       mode, discard);
+
+               /* losing a lock, update kms */
+               lock_res_and_lock(dlmlock);
+               cl_object_attr_lock(obj);
+               /* Must get the value under the lock to avoid race. */
+               old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
+               /* Update the kms. Need to loop all granted locks.
+                * Not a problem for the client */
+               attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
+
+               cl_object_attr_set(env, obj, attr, CAT_KMS);
+               cl_object_attr_unlock(obj);
+               unlock_res_and_lock(dlmlock);
+
+               cl_object_put(env, obj);
+       }
+       RETURN(result);
 }
 
 /**
 }
 
 /**
@@ -742,128 +533,75 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
                                  struct ldlm_lock_desc *new, void *data,
                                  int flag)
 {
                                  struct ldlm_lock_desc *new, void *data,
                                  int flag)
 {
-        struct lu_env     *env;
-        struct cl_env_nest nest;
-        int                result;
-
-        /*
-         * This can be called in the context of outer IO, e.g.,
-         *
-         *     cl_enqueue()->...
-         *       ->osc_enqueue_base()->...
-         *         ->ldlm_prep_elc_req()->...
-         *           ->ldlm_cancel_callback()->...
-         *             ->osc_ldlm_blocking_ast()
-         *
-         * new environment has to be created to not corrupt outer context.
-         */
-        env = cl_env_nested_get(&nest);
-        if (!IS_ERR(env)) {
-                result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
-                cl_env_nested_put(&nest, env);
-        } else {
-                result = PTR_ERR(env);
-                /*
-                 * XXX This should never happen, as cl_lock is
-                 * stuck. Pre-allocated environment a la vvp_inode_fini_env
-                 * should be used.
-                 */
-                LBUG();
-        }
-        if (result != 0) {
-                if (result == -ENODATA)
-                        result = 0;
-                else
-                        CERROR("BAST failed: %d\n", result);
-        }
-        return result;
-}
+       int result = 0;
+       ENTRY;
 
 
-static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
-                                  __u64 flags, void *data)
-{
-        struct cl_env_nest nest;
-        struct lu_env     *env;
-        struct osc_lock   *olck;
-        struct cl_lock    *lock;
-        int result;
-        int dlmrc;
-
-       /* first, do dlm part of the work */
-       dlmrc = ldlm_completion_ast_async(dlmlock, flags, data);
-       if (flags == LDLM_FL_WAIT_NOREPROC)
-               return dlmrc;
-
-        /* then, notify cl_lock */
-        env = cl_env_nested_get(&nest);
-        if (!IS_ERR(env)) {
-                olck = osc_ast_data_get(dlmlock);
-                if (olck != NULL) {
-                        lock = olck->ols_cl.cls_lock;
-                        cl_lock_mutex_get(env, lock);
-                        /*
-                         * ldlm_handle_cp_callback() copied LVB from request
-                         * to lock->l_lvb_data, store it in osc_lock.
-                         */
-                        LASSERT(dlmlock->l_lvb_data != NULL);
-                        lock_res_and_lock(dlmlock);
-                        olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
-                        if (olck->ols_lock == NULL) {
-                                /*
-                                 * upcall (osc_lock_upcall()) hasn't yet been
-                                 * called. Do nothing now, upcall will bind
-                                 * olck to dlmlock and signal the waiters.
-                                 *
-                                 * This maintains an invariant that osc_lock
-                                 * and ldlm_lock are always bound when
-                                 * osc_lock is in OLS_GRANTED state.
-                                 */
-                        } else if (dlmlock->l_granted_mode ==
-                                   dlmlock->l_req_mode) {
-                                osc_lock_granted(env, olck, dlmlock, dlmrc);
-                        }
-                        unlock_res_and_lock(dlmlock);
+       switch (flag) {
+       case LDLM_CB_BLOCKING: {
+               struct lustre_handle lockh;
 
 
-                        if (dlmrc != 0) {
-                                CL_LOCK_DEBUG(D_ERROR, env, lock,
-                                              "dlmlock returned %d\n", dlmrc);
-                                cl_lock_error(env, lock, dlmrc);
-                        }
-                        cl_lock_mutex_put(env, lock);
-                        osc_ast_data_put(env, olck);
-                        result = 0;
-                } else
-                        result = -ELDLM_NO_LOCK_DATA;
-                cl_env_nested_put(&nest, env);
-        } else
-                result = PTR_ERR(env);
-        return dlmrc ?: result;
+               ldlm_lock2handle(dlmlock, &lockh);
+               result = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+               if (result == -ENODATA)
+                       result = 0;
+               break;
+       }
+       case LDLM_CB_CANCELING: {
+               struct lu_env     *env;
+               struct cl_env_nest nest;
+
+               /*
+                * This can be called in the context of outer IO, e.g.,
+                *
+                *    osc_enqueue_base()->...
+                *      ->ldlm_prep_elc_req()->...
+                *        ->ldlm_cancel_callback()->...
+                *          ->osc_ldlm_blocking_ast()
+                *
+                * new environment has to be created to not corrupt outer
+                * context.
+                */
+               env = cl_env_nested_get(&nest);
+               if (IS_ERR(env)) {
+                       result = PTR_ERR(env);
+                       break;
+               }
+
+               result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
+               cl_env_nested_put(&nest, env);
+               break;
+       }
+       default:
+               LBUG();
+       }
+       RETURN(result);
 }
 
 static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 {
 }
 
 static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 {
-        struct ptlrpc_request  *req  = data;
-        struct osc_lock        *olck;
-        struct cl_lock         *lock;
-        struct cl_object       *obj;
-        struct cl_env_nest      nest;
-        struct lu_env          *env;
-        struct ost_lvb         *lvb;
-        struct req_capsule     *cap;
-        int                     result;
-
-        LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
-
-        env = cl_env_nested_get(&nest);
-        if (!IS_ERR(env)) {
-                /* osc_ast_data_get() has to go after environment is
-                 * allocated, because osc_ast_data() acquires a
-                 * reference to a lock, and it can only be released in
-                 * environment.
-                 */
-                olck = osc_ast_data_get(dlmlock);
-                if (olck != NULL) {
-                        lock = olck->ols_cl.cls_lock;
+       struct ptlrpc_request   *req  = data;
+       struct cl_env_nest      nest;
+       struct lu_env           *env;
+       struct ost_lvb          *lvb;
+       struct req_capsule      *cap;
+       int                     result;
+
+       ENTRY;
+
+       LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
+
+       env = cl_env_nested_get(&nest);
+       if (!IS_ERR(env)) {
+               struct cl_object *obj = NULL;
+
+               lock_res_and_lock(dlmlock);
+               if (dlmlock->l_ast_data != NULL) {
+                       obj = osc2cl(dlmlock->l_ast_data);
+                       cl_object_get(obj);
+               }
+               unlock_res_and_lock(dlmlock);
+
+               if (obj != NULL) {
                         /* Do not grab the mutex of cl_lock for glimpse.
                          * See LU-1274 for details.
                          * BTW, it's okay for cl_lock to be cancelled during
                         /* Do not grab the mutex of cl_lock for glimpse.
                          * See LU-1274 for details.
                          * BTW, it's okay for cl_lock to be cancelled during
@@ -877,7 +615,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
                         result = req_capsule_server_pack(cap);
                         if (result == 0) {
                                 lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
                         result = req_capsule_server_pack(cap);
                         if (result == 0) {
                                 lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
-                                obj = lock->cll_descr.cld_obj;
                                 result = cl_object_glimpse(env, obj, lvb);
                         }
                        if (!exp_connect_lvb_type(req->rq_export))
                                 result = cl_object_glimpse(env, obj, lvb);
                         }
                        if (!exp_connect_lvb_type(req->rq_export))
@@ -885,7 +622,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
                                                   &RMF_DLM_LVB,
                                                   sizeof(struct ost_lvb_v1),
                                                   RCL_SERVER);
                                                   &RMF_DLM_LVB,
                                                   sizeof(struct ost_lvb_v1),
                                                   RCL_SERVER);
-                        osc_ast_data_put(env, olck);
+                       cl_object_put(env, obj);
                 } else {
                         /*
                          * These errors are normal races, so we don't want to
                 } else {
                         /*
                          * These errors are normal races, so we don't want to
@@ -896,10 +633,10 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
                         result = -ELDLM_NO_LOCK_DATA;
                 }
                 cl_env_nested_put(&nest, env);
                         result = -ELDLM_NO_LOCK_DATA;
                 }
                 cl_env_nested_put(&nest, env);
-        } else
-                result = PTR_ERR(env);
-        req->rq_status = result;
-        return result;
+       } else
+               result = PTR_ERR(env);
+       req->rq_status = result;
+       RETURN(result);
 }
 
 static int weigh_cb(const struct lu_env *env, struct cl_io *io,
 }
 
 static int weigh_cb(const struct lu_env *env, struct cl_io *io,
@@ -918,24 +655,25 @@ static int weigh_cb(const struct lu_env *env, struct cl_io *io,
 }
 
 static unsigned long osc_lock_weight(const struct lu_env *env,
 }
 
 static unsigned long osc_lock_weight(const struct lu_env *env,
-                                    const struct osc_lock *ols)
+                                    struct osc_object *oscobj,
+                                    struct ldlm_extent *extent)
 {
 {
-       struct cl_io *io = &osc_env_info(env)->oti_io;
-       struct cl_lock_descr *descr = &ols->ols_cl.cls_lock->cll_descr;
-       struct cl_object *obj = ols->ols_cl.cls_obj;
-       unsigned long npages = 0;
-       int result;
+       struct cl_io     *io = &osc_env_info(env)->oti_io;
+       struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
+       unsigned long    npages = 0;
+       int              result;
        ENTRY;
 
        ENTRY;
 
-       io->ci_obj = cl_object_top(obj);
+       io->ci_obj = obj;
        io->ci_ignore_layout = 1;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        if (result != 0)
                RETURN(result);
 
        do {
        io->ci_ignore_layout = 1;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        if (result != 0)
                RETURN(result);
 
        do {
-               result = osc_page_gang_lookup(env, io, cl2osc(obj),
-                                             descr->cld_start, descr->cld_end,
+               result = osc_page_gang_lookup(env, io, oscobj,
+                                             cl_index(obj, extent->start),
+                                             cl_index(obj, extent->end),
                                              weigh_cb, (void *)&npages);
                if (result == CLP_GANG_ABORT)
                        break;
                                              weigh_cb, (void *)&npages);
                if (result == CLP_GANG_ABORT)
                        break;
@@ -954,8 +692,10 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
 {
        struct cl_env_nest       nest;
        struct lu_env           *env;
 {
        struct cl_env_nest       nest;
        struct lu_env           *env;
-       struct osc_lock         *lock;
+       struct osc_object       *obj;
+       struct osc_lock         *oscl;
        unsigned long            weight;
        unsigned long            weight;
+       bool                    found = false;
        ENTRY;
 
        might_sleep();
        ENTRY;
 
        might_sleep();
@@ -972,17 +712,25 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
                RETURN(1);
 
        LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
                RETURN(1);
 
        LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
-       lock = osc_ast_data_get(dlmlock);
-       if (lock == NULL) {
-               /* cl_lock was destroyed because of memory pressure.
-                * It is much reasonable to assign this type of lock
-                * a lower cost.
+       obj = dlmlock->l_ast_data;
+       if (obj == NULL)
+               GOTO(out, weight = 1);
+
+       spin_lock(&obj->oo_ol_spin);
+       list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) {
+               if (oscl->ols_dlmlock != NULL && oscl->ols_dlmlock != dlmlock)
+                       continue;
+               found = true;
+       }
+       spin_unlock(&obj->oo_ol_spin);
+       if (found) {
+               /*
+                * If the lock is being used by an IO, definitely not cancel it.
                 */
                 */
-               GOTO(out, weight = 0);
+               GOTO(out, weight = 1);
        }
 
        }
 
-       weight = osc_lock_weight(env, lock);
-       osc_ast_data_put(env, lock);
+       weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
        EXIT;
 
 out:
        EXIT;
 
 out:
@@ -991,27 +739,16 @@ out:
 }
 
 static void osc_lock_build_einfo(const struct lu_env *env,
 }
 
 static void osc_lock_build_einfo(const struct lu_env *env,
-                                 const struct cl_lock *clock,
-                                 struct osc_lock *lock,
-                                 struct ldlm_enqueue_info *einfo)
+                                const struct cl_lock *lock,
+                                struct osc_object *osc,
+                                struct ldlm_enqueue_info *einfo)
 {
 {
-        enum cl_lock_mode mode;
-
-        mode = clock->cll_descr.cld_mode;
-        if (mode == CLM_PHANTOM)
-                /*
-                 * For now, enqueue all glimpse locks in read mode. In the
-                 * future, client might choose to enqueue LCK_PW lock for
-                 * glimpse on a file opened for write.
-                 */
-                mode = CLM_READ;
-
-        einfo->ei_type   = LDLM_EXTENT;
-        einfo->ei_mode   = osc_cl_lock2ldlm(mode);
-        einfo->ei_cb_bl  = osc_ldlm_blocking_ast;
-        einfo->ei_cb_cp  = osc_ldlm_completion_ast;
-        einfo->ei_cb_gl  = osc_ldlm_glimpse_ast;
-        einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */
+       einfo->ei_type   = LDLM_EXTENT;
+       einfo->ei_mode   = osc_cl_lock2ldlm(lock->cll_descr.cld_mode);
+       einfo->ei_cb_bl  = osc_ldlm_blocking_ast;
+       einfo->ei_cb_cp  = ldlm_completion_ast;
+       einfo->ei_cb_gl  = osc_ldlm_glimpse_ast;
+       einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
 }
 
 /**
 }
 
 /**
@@ -1067,114 +804,100 @@ static void osc_lock_to_lockless(const struct lu_env *env,
         LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
 }
 
         LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
 }
 
-static int osc_lock_compatible(const struct osc_lock *qing,
-                               const struct osc_lock *qed)
+static bool osc_lock_compatible(const struct osc_lock *qing,
+                               const struct osc_lock *qed)
 {
 {
-        enum cl_lock_mode qing_mode;
-        enum cl_lock_mode qed_mode;
+       struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
+       struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
+
+       if (qed->ols_glimpse)
+               return true;
+
+       if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
+               return true;
+
+       if (qed->ols_state < OLS_GRANTED)
+               return true;
 
 
-        qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode;
-        if (qed->ols_glimpse &&
-            (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ))
-                return 1;
+       if (qed_descr->cld_mode  >= qing_descr->cld_mode &&
+           qed_descr->cld_start <= qing_descr->cld_start &&
+           qed_descr->cld_end   >= qing_descr->cld_end)
+               return true;
 
 
-        qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode;
-        return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ));
+       return false;
 }
 
 }
 
-/**
- * Cancel all conflicting locks and wait for them to be destroyed.
- *
- * This function is used for two purposes:
- *
- *     - early cancel all conflicting locks before starting IO, and
- *
- *     - guarantee that pages added to the page cache by lockless IO are never
- *       covered by locks other than lockless IO lock, and, hence, are not
- *       visible to other threads.
- */
-static int osc_lock_enqueue_wait(const struct lu_env *env,
-                                 const struct osc_lock *olck)
+static void osc_lock_wake_waiters(const struct lu_env *env,
+                                 struct osc_object *osc,
+                                 struct osc_lock *oscl)
 {
 {
-        struct cl_lock          *lock    = olck->ols_cl.cls_lock;
-        struct cl_lock_descr    *descr   = &lock->cll_descr;
-        struct cl_object_header *hdr     = cl_object_header(descr->cld_obj);
-        struct cl_lock          *scan;
-        struct cl_lock          *conflict= NULL;
-        int lockless                     = osc_lock_is_lockless(olck);
-        int rc                           = 0;
-        ENTRY;
+       spin_lock(&osc->oo_ol_spin);
+       list_del_init(&oscl->ols_nextlock_oscobj);
+       spin_unlock(&osc->oo_ol_spin);
 
 
-        LASSERT(cl_lock_is_mutexed(lock));
+       spin_lock(&oscl->ols_lock);
+       while (!list_empty(&oscl->ols_waiting_list)) {
+               struct osc_lock *scan;
 
 
-        /* make it enqueue anyway for glimpse lock, because we actually
-         * don't need to cancel any conflicting locks. */
-        if (olck->ols_glimpse)
-                return 0;
+               scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock,
+                                 ols_wait_entry);
+               list_del_init(&scan->ols_wait_entry);
 
 
-       spin_lock(&hdr->coh_lock_guard);
-       list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
-                struct cl_lock_descr *cld = &scan->cll_descr;
-                const struct osc_lock *scan_ols;
+               cl_sync_io_note(env, scan->ols_owner, 0);
+       }
+       spin_unlock(&oscl->ols_lock);
+}
 
 
-                if (scan == lock)
-                        break;
+static void osc_lock_enqueue_wait(const struct lu_env *env,
+                                 struct osc_object *obj,
+                                 struct osc_lock *oscl)
+{
+       struct osc_lock         *tmp_oscl;
+       struct cl_lock_descr    *need = &oscl->ols_cl.cls_lock->cll_descr;
+       struct cl_sync_io       *waiter = &osc_env_info(env)->oti_anchor;
 
 
-                if (scan->cll_state < CLS_QUEUING ||
-                    scan->cll_state == CLS_FREEING ||
-                    cld->cld_start > descr->cld_end ||
-                    cld->cld_end < descr->cld_start)
-                        continue;
+       spin_lock(&obj->oo_ol_spin);
+       list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list);
 
 
-                /* overlapped and living locks. */
+restart:
+       list_for_each_entry(tmp_oscl, &obj->oo_ol_list,
+                           ols_nextlock_oscobj) {
+               struct cl_lock_descr *descr;
 
 
-                /* We're not supposed to give up group lock. */
-                if (scan->cll_descr.cld_mode == CLM_GROUP) {
-                        LASSERT(descr->cld_mode != CLM_GROUP ||
-                                descr->cld_gid != scan->cll_descr.cld_gid);
-                        continue;
-                }
+               if (tmp_oscl == oscl)
+                       break;
 
 
-                scan_ols = osc_lock_at(scan);
+               descr = &tmp_oscl->ols_cl.cls_lock->cll_descr;
+               if (descr->cld_start > need->cld_end ||
+                   descr->cld_end   < need->cld_start)
+                       continue;
 
 
-                /* We need to cancel the compatible locks if we're enqueuing
-                 * a lockless lock, for example:
-                 * imagine that client has PR lock on [0, 1000], and thread T0
-                 * is doing lockless IO in [500, 1500] region. Concurrent
-                 * thread T1 can see lockless data in [500, 1000], which is
-                 * wrong, because these data are possibly stale. */
-                if (!lockless && osc_lock_compatible(olck, scan_ols))
-                        continue;
+               /* We're not supposed to give up group lock */
+               if (descr->cld_mode == CLM_GROUP)
+                       break;
 
 
-                cl_lock_get_trust(scan);
-                conflict = scan;
-                break;
-        }
-       spin_unlock(&hdr->coh_lock_guard);
+               if (!osc_lock_is_lockless(oscl) &&
+                   osc_lock_compatible(oscl, tmp_oscl))
+                       continue;
 
 
-        if (conflict) {
-                if (lock->cll_descr.cld_mode == CLM_GROUP) {
-                        /* we want a group lock but a previous lock request
-                         * conflicts, we do not wait but return 0 so the
-                         * request is send to the server
-                         */
-                        CDEBUG(D_DLMTRACE, "group lock %p is conflicted "
-                                           "with %p, no wait, send to server\n",
-                               lock, conflict);
-                        cl_lock_put(env, conflict);
-                        rc = 0;
-                } else {
-                        CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, "
-                                           "will wait\n",
-                               lock, conflict);
-                        LASSERT(lock->cll_conflict == NULL);
-                        lu_ref_add(&conflict->cll_reference, "cancel-wait",
-                                   lock);
-                        lock->cll_conflict = conflict;
-                        rc = CLO_WAIT;
-                }
-        }
-        RETURN(rc);
+               /* wait for conflicting lock to be canceled */
+               cl_sync_io_init(waiter, 1, cl_sync_io_end);
+               oscl->ols_owner = waiter;
+
+               spin_lock(&tmp_oscl->ols_lock);
+               /* add oscl into tmp's ols_waiting list */
+               list_add_tail(&oscl->ols_wait_entry,
+                             &tmp_oscl->ols_waiting_list);
+               spin_unlock(&tmp_oscl->ols_lock);
+
+               spin_unlock(&obj->oo_ol_spin);
+               (void)cl_sync_io_wait(env, waiter, 0);
+
+               spin_lock(&obj->oo_ol_spin);
+               oscl->ols_owner = NULL;
+               goto restart;
+       }
+       spin_unlock(&obj->oo_ol_spin);
 }
 
 /**
 }
 
 /**
@@ -1192,190 +915,125 @@ static int osc_lock_enqueue_wait(const struct lu_env *env,
  * This function does not wait for the network communication to complete.
  */
 static int osc_lock_enqueue(const struct lu_env *env,
  * This function does not wait for the network communication to complete.
  */
 static int osc_lock_enqueue(const struct lu_env *env,
-                            const struct cl_lock_slice *slice,
-                            struct cl_io *unused, __u32 enqflags)
+                           const struct cl_lock_slice *slice,
+                           struct cl_io *unused, struct cl_sync_io *anchor)
 {
 {
-        struct osc_lock          *ols     = cl2osc_lock(slice);
-        struct cl_lock           *lock    = ols->ols_cl.cls_lock;
-        int result;
+       struct osc_thread_info          *info  = osc_env_info(env);
+       struct osc_io                   *oio   = osc_env_io(env);
+       struct osc_object               *osc   = cl2osc(slice->cls_obj);
+       struct osc_lock                 *oscl  = cl2osc_lock(slice);
+       struct cl_lock                  *lock  = slice->cls_lock;
+       struct ldlm_res_id              *resname = &info->oti_resname;
+       ldlm_policy_data_t              *policy  = &info->oti_policy;
+       osc_enqueue_upcall_f            upcall   = osc_lock_upcall;
+       void                            *cookie  = oscl;
+       bool                            async    = false;
+       int                             result;
+
         ENTRY;
 
         ENTRY;
 
-        LASSERT(cl_lock_is_mutexed(lock));
-        LASSERTF(ols->ols_state == OLS_NEW,
-                 "Impossible state: %d\n", ols->ols_state);
-
-       LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
-               "lock = %p, ols = %p\n", lock, ols);
-
-        result = osc_lock_enqueue_wait(env, ols);
-        if (result == 0) {
-                if (!osc_lock_is_lockless(ols)) {
-                        struct osc_object        *obj = cl2osc(slice->cls_obj);
-                        struct osc_thread_info   *info = osc_env_info(env);
-                        struct ldlm_res_id       *resname = &info->oti_resname;
-                        ldlm_policy_data_t       *policy = &info->oti_policy;
-                        struct ldlm_enqueue_info *einfo = &ols->ols_einfo;
-
-                       /* lock will be passed as upcall cookie,
-                        * hold ref to prevent to be released. */
-                        cl_lock_hold_add(env, lock, "upcall", lock);
-                       /* a user for agl lock also */
-                       if (ols->ols_agl)
-                               cl_lock_user_add(env, lock);
-                        ols->ols_state = OLS_ENQUEUED;
+       LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
+               "lock = %p, ols = %p\n", lock, oscl);
 
 
-                        /*
-                         * XXX: this is possible blocking point as
-                         * ldlm_lock_match(LDLM_FL_LVB_READY) waits for
-                         * LDLM_CP_CALLBACK.
-                         */
-                       ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
-                        osc_lock_build_policy(env, lock, policy);
-                        result = osc_enqueue_base(osc_export(obj), resname,
-                                          &ols->ols_flags, policy,
-                                          &ols->ols_lvb,
-                                          obj->oo_oinfo->loi_kms_valid,
-                                          osc_lock_upcall,
-                                          ols, einfo, &ols->ols_handle,
-                                          PTLRPCD_SET, 1, ols->ols_agl);
-                        if (result != 0) {
-                               if (ols->ols_agl)
-                                       cl_lock_user_del(env, lock);
-                               cl_lock_unhold(env, lock, "upcall", lock);
-                                if (unlikely(result == -ECANCELED)) {
-                                        ols->ols_state = OLS_NEW;
-                                        result = 0;
-                                }
-                        }
-                } else {
-                        ols->ols_state = OLS_GRANTED;
-                        ols->ols_owner = osc_env_io(env);
-                }
-        }
-        LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
-        RETURN(result);
-}
+       if (oscl->ols_state == OLS_GRANTED)
+               RETURN(0);
 
 
-static int osc_lock_wait(const struct lu_env *env,
-                         const struct cl_lock_slice *slice)
-{
-        struct osc_lock *olck = cl2osc_lock(slice);
-        struct cl_lock  *lock = olck->ols_cl.cls_lock;
-
-        LINVRNT(osc_lock_invariant(olck));
-
-        if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) {
-                if (olck->ols_flags & LDLM_FL_LVB_READY) {
-                        return 0;
-                } else if (olck->ols_agl) {
-                        if (lock->cll_flags & CLF_FROM_UPCALL)
-                                /* It is from enqueue RPC reply upcall for
-                                 * updating state. Do not re-enqueue. */
-                                return -ENAVAIL;
-                        else
-                                olck->ols_state = OLS_NEW;
-                } else {
-                        LASSERT(lock->cll_error);
-                        return lock->cll_error;
-                }
-        }
+       if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
+               GOTO(enqueue_base, 0);
 
 
-        if (olck->ols_state == OLS_NEW) {
-                int rc;
-
-                LASSERT(olck->ols_agl);
-               olck->ols_agl = 0;
-               olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT;
-                rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST);
-                if (rc != 0)
-                        return rc;
-                else
-                        return CLO_REENQUEUED;
-        }
+       if (oscl->ols_glimpse) {
+               LASSERT(equi(oscl->ols_agl, anchor == NULL));
+               async = true;
+               GOTO(enqueue_base, 0);
+       }
+
+       osc_lock_enqueue_wait(env, osc, oscl);
 
 
-        LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED &&
-                     lock->cll_error == 0, olck->ols_lock != NULL));
+       /* we can grant lockless lock right after all conflicting locks
+        * are canceled. */
+       if (osc_lock_is_lockless(oscl)) {
+               oscl->ols_state = OLS_GRANTED;
+               oio->oi_lockless = 1;
+               RETURN(0);
+       }
 
 
-        return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT;
+enqueue_base:
+       oscl->ols_state = OLS_ENQUEUED;
+       if (anchor != NULL) {
+               atomic_inc(&anchor->csi_sync_nr);
+               oscl->ols_owner = anchor;
+       }
+
+       /**
+        * DLM lock's ast data must be osc_object;
+        * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
+        * DLM's enqueue callback set to osc_lock_upcall() with cookie as
+        * osc_lock.
+        */
+       ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+       osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo);
+       osc_lock_build_policy(env, lock, policy);
+       if (oscl->ols_agl) {
+               oscl->ols_einfo.ei_cbdata = NULL;
+               /* hold a reference for callback */
+               cl_object_get(osc2cl(osc));
+               upcall = osc_lock_upcall_agl;
+               cookie = osc;
+       }
+       result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
+                                 policy, &oscl->ols_lvb,
+                                 osc->oo_oinfo->loi_kms_valid,
+                                 upcall, cookie,
+                                 &oscl->ols_einfo, PTLRPCD_SET, async,
+                                 oscl->ols_agl);
+       if (result != 0) {
+               oscl->ols_state = OLS_CANCELLED;
+               osc_lock_wake_waiters(env, osc, oscl);
+
+               /* hide error for AGL lock. */
+               if (oscl->ols_agl) {
+                       cl_object_put(env, osc2cl(osc));
+                       result = 0;
+               }
+
+               if (anchor != NULL)
+                       cl_sync_io_note(env, anchor, result);
+       } else {
+               if (osc_lock_is_lockless(oscl)) {
+                       oio->oi_lockless = 1;
+               } else if (!async) {
+                       LASSERT(oscl->ols_state == OLS_GRANTED);
+                       LASSERT(oscl->ols_hold);
+                       LASSERT(oscl->ols_dlmlock != NULL);
+               }
+       }
+       RETURN(result);
 }
 
 /**
 }
 
 /**
- * An implementation of cl_lock_operations::clo_use() method that pins cached
- * lock.
+ * Breaks a link between osc_lock and dlm_lock.
  */
  */
-static int osc_lock_use(const struct lu_env *env,
-                        const struct cl_lock_slice *slice)
+static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
 {
 {
-        struct osc_lock *olck = cl2osc_lock(slice);
-        int rc;
-
-        LASSERT(!olck->ols_hold);
-
-        /*
-         * Atomically check for LDLM_FL_CBPENDING and addref a lock if this
-         * flag is not set. This protects us from a concurrent blocking ast.
-         */
-        rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode);
-        if (rc == 0) {
-                olck->ols_hold = 1;
-                olck->ols_state = OLS_GRANTED;
-        } else {
-                struct cl_lock *lock;
-
-                /*
-                 * Lock is being cancelled somewhere within
-                 * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already
-                 * set, but osc_ldlm_blocking_ast() hasn't yet acquired
-                 * cl_lock mutex.
-                 */
-                lock = slice->cls_lock;
-                LASSERT(lock->cll_state == CLS_INTRANSIT);
-                LASSERT(lock->cll_users > 0);
-                /* set a flag for osc_dlm_blocking_ast0() to signal the
-                 * lock.*/
-                olck->ols_ast_wait = 1;
-                rc = CLO_WAIT;
-        }
-        return rc;
-}
+       struct ldlm_lock *dlmlock;
 
 
-static int osc_lock_flush(struct osc_lock *ols, int discard)
-{
-       struct cl_lock       *lock  = ols->ols_cl.cls_lock;
-       struct cl_env_nest    nest;
-       struct lu_env        *env;
-       int result = 0;
-       ENTRY;
+       dlmlock = olck->ols_dlmlock;
+       if (dlmlock == NULL)
+               return;
 
 
-       env = cl_env_nested_get(&nest);
-       if (!IS_ERR(env)) {
-               struct osc_object    *obj   = cl2osc(ols->ols_cl.cls_obj);
-               struct cl_lock_descr *descr = &lock->cll_descr;
-               int rc = 0;
-
-               if (descr->cld_mode >= CLM_WRITE) {
-                       result = osc_cache_writeback_range(env, obj,
-                                       descr->cld_start, descr->cld_end,
-                                       1, discard);
-                       LDLM_DEBUG(ols->ols_lock,
-                               "lock %p: %d pages were %s.\n", lock, result,
-                               discard ? "discarded" : "written");
-                       if (result > 0)
-                               result = 0;
-               }
+       if (olck->ols_hold) {
+               olck->ols_hold = 0;
+               osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+               olck->ols_handle.cookie = 0ULL;
+       }
 
 
-               rc = osc_lock_discard_pages(env, ols);
-               if (result == 0 && rc < 0)
-                       result = rc;
+       olck->ols_dlmlock = NULL;
 
 
-                cl_env_nested_put(&nest, env);
-        } else
-                result = PTR_ERR(env);
-        if (result == 0) {
-                ols->ols_flush = 1;
-                LINVRNT(!osc_lock_has_pages(ols));
-        }
-       RETURN(result);
+       /* release a reference taken in osc_lock_upcall(). */
+       LASSERT(olck->ols_has_ref);
+       lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
+       LDLM_LOCK_RELEASE(dlmlock);
+       olck->ols_has_ref = 0;
 }
 
 /**
 }
 
 /**
@@ -1395,167 +1053,19 @@ static int osc_lock_flush(struct osc_lock *ols, int discard)
 static void osc_lock_cancel(const struct lu_env *env,
                             const struct cl_lock_slice *slice)
 {
 static void osc_lock_cancel(const struct lu_env *env,
                             const struct cl_lock_slice *slice)
 {
-       struct cl_lock   *lock    = slice->cls_lock;
-       struct osc_lock  *olck    = cl2osc_lock(slice);
-       struct ldlm_lock *dlmlock = olck->ols_lock;
-
-       LASSERT(cl_lock_is_mutexed(lock));
-       LINVRNT(osc_lock_invariant(olck));
-
-       if (dlmlock != NULL) {
-               bool do_cancel;
-               int  result = 0;
-
-               if (olck->ols_state >= OLS_GRANTED)
-                       result = osc_lock_flush(olck,
-                               ldlm_is_discard_data(dlmlock));
-               osc_lock_unhold(olck);
-
-               lock_res_and_lock(dlmlock);
-               /* Now that we're the only user of dlm read/write reference,
-                * mostly the ->l_readers + ->l_writers should be zero.
-                * However, there is a corner case.
-                * See b=18829 for details.*/
-               do_cancel = (dlmlock->l_readers == 0 &&
-                            dlmlock->l_writers == 0);
-               ldlm_set_cbpending(dlmlock);
-               unlock_res_and_lock(dlmlock);
-               if (do_cancel)
-                       result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC);
-               if (result < 0)
-                       CL_LOCK_DEBUG(D_ERROR, env, lock,
-                                     "lock %p cancel failure with error(%d)\n",
-                                     lock, result);
-       }
-       olck->ols_state = OLS_CANCELLED;
-       olck->ols_flags &= ~LDLM_FL_LVB_READY;
-       osc_lock_detach(env, olck);
-}
-
-#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
-static int check_cb(const struct lu_env *env, struct cl_io *io,
-                   struct osc_page *ops, void *cbdata)
-{
-       struct cl_lock *lock = cbdata;
-
-       if (lock->cll_descr.cld_mode == CLM_READ) {
-               struct cl_lock *tmp;
-               tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj,
-                                      osc_index(ops), lock, 1, 0);
-               if (tmp != NULL) {
-                       cl_lock_put(env, tmp);
-                       return CLP_GANG_OKAY;
-               }
-       }
-
-       CL_LOCK_DEBUG(D_ERROR, env, lock, "still has pages\n");
-       CL_PAGE_DEBUG(D_ERROR, env, ops->ops_cl.cpl_page, "\n");
-       return CLP_GANG_ABORT;
-}
-
-/**
- * Returns true iff there are pages under \a olck not protected by other
- * locks.
- */
-static bool osc_lock_has_pages(struct osc_lock *olck)
-{
-        struct cl_lock       *lock;
-        struct cl_lock_descr *descr;
-        struct cl_object     *obj;
-        struct osc_object    *oob;
-        struct cl_env_nest    nest;
-        struct cl_io         *io;
-        struct lu_env        *env;
-       bool                     has_pages;
-       int                      rc;
-
-       env = cl_env_nested_get(&nest);
-       if (IS_ERR(env))
-               return false;
-
-        obj   = olck->ols_cl.cls_obj;
-        oob   = cl2osc(obj);
-        io    = &oob->oo_debug_io;
-        lock  = olck->ols_cl.cls_lock;
-        descr = &lock->cll_descr;
-
-       mutex_lock(&oob->oo_debug_mutex);
-       io->ci_obj = cl_object_top(obj);
-       io->ci_ignore_layout = 1;
-       rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-       if (rc != 0)
-               GOTO(out, has_pages = false);
-
-       do {
-               rc = osc_page_gang_lookup(env, io, oob,
-                                         descr->cld_start, descr->cld_end,
-                                         check_cb, (void *)lock);
-               if (rc == CLP_GANG_ABORT)
-                       break;
-               if (rc == CLP_GANG_RESCHED)
-                       cond_resched();
-       } while (rc != CLP_GANG_OKAY);
-       has_pages = (rc == CLP_GANG_ABORT);
-out:
-       cl_io_fini(env, io);
-       mutex_unlock(&oob->oo_debug_mutex);
-       cl_env_nested_put(&nest, env);
-
-       return has_pages;
-}
-#else
-static bool osc_lock_has_pages(struct osc_lock *olck)
-{
-       return false;
-}
-#endif /* CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
-
-static void osc_lock_delete(const struct lu_env *env,
-                            const struct cl_lock_slice *slice)
-{
-        struct osc_lock *olck;
+       struct osc_object *obj  = cl2osc(slice->cls_obj);
+       struct osc_lock   *oscl = cl2osc_lock(slice);
 
 
-        olck = cl2osc_lock(slice);
-        if (olck->ols_glimpse) {
-                LASSERT(!olck->ols_hold);
-                LASSERT(!olck->ols_lock);
-                return;
-        }
+       ENTRY;
 
 
-        LINVRNT(osc_lock_invariant(olck));
-        LINVRNT(!osc_lock_has_pages(olck));
+       LINVRNT(osc_lock_invariant(oscl));
 
 
-        osc_lock_unhold(olck);
-        osc_lock_detach(env, olck);
-}
+       osc_lock_detach(env, oscl);
+       oscl->ols_state = OLS_CANCELLED;
+       oscl->ols_flags &= ~LDLM_FL_LVB_READY;
 
 
-/**
- * Implements cl_lock_operations::clo_state() method for osc layer.
- *
- * Maintains osc_lock::ols_owner field.
- *
- * This assumes that lock always enters CLS_HELD (from some other state) in
- * the same IO context as one that requested the lock. This should not be a
- * problem, because context is by definition shared by all activity pertaining
- * to the same high-level IO.
- */
-static void osc_lock_state(const struct lu_env *env,
-                           const struct cl_lock_slice *slice,
-                           enum cl_lock_state state)
-{
-        struct osc_lock *lock = cl2osc_lock(slice);
-
-        /*
-         * XXX multiple io contexts can use the lock at the same time.
-         */
-        LINVRNT(osc_lock_invariant(lock));
-        if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) {
-                struct osc_io *oio = osc_env_io(env);
-
-                LASSERT(lock->ols_owner == NULL);
-                lock->ols_owner = oio;
-        } else if (state != CLS_HELD)
-                lock->ols_owner = NULL;
+       osc_lock_wake_waiters(env, obj, oscl);
+       EXIT;
 }
 
 static int osc_lock_print(const struct lu_env *env, void *cookie,
 }
 
 static int osc_lock_print(const struct lu_env *env, void *cookie,
@@ -1563,196 +1073,162 @@ static int osc_lock_print(const struct lu_env *env, void *cookie,
 {
        struct osc_lock *lock = cl2osc_lock(slice);
 
 {
        struct osc_lock *lock = cl2osc_lock(slice);
 
-       /*
-        * XXX print ldlm lock and einfo properly.
-        */
        (*p)(env, cookie, "%p "LPX64" "LPX64" %d %p ",
        (*p)(env, cookie, "%p "LPX64" "LPX64" %d %p ",
-            lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie,
+            lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
             lock->ols_state, lock->ols_owner);
        osc_lvb_print(env, cookie, p, &lock->ols_lvb);
        return 0;
 }
 
             lock->ols_state, lock->ols_owner);
        osc_lvb_print(env, cookie, p, &lock->ols_lvb);
        return 0;
 }
 
-static int osc_lock_fits_into(const struct lu_env *env,
-                              const struct cl_lock_slice *slice,
-                              const struct cl_lock_descr *need,
-                              const struct cl_io *io)
-{
-        struct osc_lock *ols = cl2osc_lock(slice);
-
-        if (need->cld_enq_flags & CEF_NEVER)
-                return 0;
-
-       if (ols->ols_state >= OLS_CANCELLED)
-               return 0;
-
-        if (need->cld_mode == CLM_PHANTOM) {
-                if (ols->ols_agl)
-                        return !(ols->ols_state > OLS_RELEASED);
-
-                /*
-                 * Note: the QUEUED lock can't be matched here, otherwise
-                 * it might cause the deadlocks.
-                 * In read_process,
-                 * P1: enqueued read lock, create sublock1
-                 * P2: enqueued write lock, create sublock2(conflicted
-                 *     with sublock1).
-                 * P1: Grant read lock.
-                 * P1: enqueued glimpse lock(with holding sublock1_read),
-                 *     matched with sublock2, waiting sublock2 to be granted.
-                 *     But sublock2 can not be granted, because P1
-                 *     will not release sublock1. Bang!
-                 */
-                if (ols->ols_state < OLS_GRANTED ||
-                    ols->ols_state > OLS_RELEASED)
-                        return 0;
-        } else if (need->cld_enq_flags & CEF_MUST) {
-                /*
-                 * If the lock hasn't ever enqueued, it can't be matched
-                 * because enqueue process brings in many information
-                 * which can be used to determine things such as lockless,
-                 * CEF_MUST, etc.
-                 */
-                if (ols->ols_state < OLS_UPCALL_RECEIVED &&
-                    ols->ols_locklessable)
-                        return 0;
-        }
-        return 1;
-}
-
 static const struct cl_lock_operations osc_lock_ops = {
         .clo_fini    = osc_lock_fini,
         .clo_enqueue = osc_lock_enqueue,
 static const struct cl_lock_operations osc_lock_ops = {
         .clo_fini    = osc_lock_fini,
         .clo_enqueue = osc_lock_enqueue,
-        .clo_wait    = osc_lock_wait,
-        .clo_unuse   = osc_lock_unuse,
-        .clo_use     = osc_lock_use,
-        .clo_delete  = osc_lock_delete,
-        .clo_state   = osc_lock_state,
         .clo_cancel  = osc_lock_cancel,
         .clo_print   = osc_lock_print,
         .clo_cancel  = osc_lock_cancel,
         .clo_print   = osc_lock_print,
-        .clo_fits_into = osc_lock_fits_into,
 };
 
 };
 
-static int osc_lock_lockless_unuse(const struct lu_env *env,
-                                   const struct cl_lock_slice *slice)
-{
-        struct osc_lock *ols = cl2osc_lock(slice);
-        struct cl_lock *lock = slice->cls_lock;
-
-        LASSERT(ols->ols_state == OLS_GRANTED);
-        LINVRNT(osc_lock_invariant(ols));
-
-        cl_lock_cancel(env, lock);
-        cl_lock_delete(env, lock);
-        return 0;
-}
-
 static void osc_lock_lockless_cancel(const struct lu_env *env,
 static void osc_lock_lockless_cancel(const struct lu_env *env,
-                                     const struct cl_lock_slice *slice)
+                                    const struct cl_lock_slice *slice)
 {
 {
-        struct osc_lock   *ols  = cl2osc_lock(slice);
-        int result;
+       struct osc_lock      *ols   = cl2osc_lock(slice);
+       struct osc_object    *osc   = cl2osc(slice->cls_obj);
+       struct cl_lock_descr *descr = &slice->cls_lock->cll_descr;
+       int result;
 
 
-        result = osc_lock_flush(ols, 0);
+       LASSERT(ols->ols_dlmlock == NULL);
+       result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
+                               descr->cld_mode, 0);
         if (result)
                 CERROR("Pages for lockless lock %p were not purged(%d)\n",
                        ols, result);
         if (result)
                 CERROR("Pages for lockless lock %p were not purged(%d)\n",
                        ols, result);
-        ols->ols_state = OLS_CANCELLED;
-}
-
-static int osc_lock_lockless_wait(const struct lu_env *env,
-                                  const struct cl_lock_slice *slice)
-{
-        struct osc_lock *olck = cl2osc_lock(slice);
-        struct cl_lock  *lock = olck->ols_cl.cls_lock;
-
-        LINVRNT(osc_lock_invariant(olck));
-        LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED);
-
-        return lock->cll_error;
-}
-
-static void osc_lock_lockless_state(const struct lu_env *env,
-                                    const struct cl_lock_slice *slice,
-                                    enum cl_lock_state state)
-{
-        struct osc_lock *lock = cl2osc_lock(slice);
-
-        LINVRNT(osc_lock_invariant(lock));
-        if (state == CLS_HELD) {
-                struct osc_io *oio  = osc_env_io(env);
-
-                LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio));
-                lock->ols_owner = oio;
 
 
-                /* set the io to be lockless if this lock is for io's
-                 * host object */
-                if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj))
-                        oio->oi_lockless = 1;
-        }
-}
-
-static int osc_lock_lockless_fits_into(const struct lu_env *env,
-                                       const struct cl_lock_slice *slice,
-                                       const struct cl_lock_descr *need,
-                                       const struct cl_io *io)
-{
-        struct osc_lock *lock = cl2osc_lock(slice);
-
-        if (!(need->cld_enq_flags & CEF_NEVER))
-                return 0;
-
-        /* lockless lock should only be used by its owning io. b22147 */
-        return (lock->ols_owner == osc_env_io(env));
+       osc_lock_wake_waiters(env, osc, ols);
 }
 
 static const struct cl_lock_operations osc_lock_lockless_ops = {
         .clo_fini      = osc_lock_fini,
         .clo_enqueue   = osc_lock_enqueue,
 }
 
 static const struct cl_lock_operations osc_lock_lockless_ops = {
         .clo_fini      = osc_lock_fini,
         .clo_enqueue   = osc_lock_enqueue,
-        .clo_wait      = osc_lock_lockless_wait,
-        .clo_unuse     = osc_lock_lockless_unuse,
-        .clo_state     = osc_lock_lockless_state,
-        .clo_fits_into = osc_lock_lockless_fits_into,
         .clo_cancel    = osc_lock_lockless_cancel,
         .clo_print     = osc_lock_print
 };
 
         .clo_cancel    = osc_lock_lockless_cancel,
         .clo_print     = osc_lock_print
 };
 
+static void osc_lock_set_writer(const struct lu_env *env,
+                               const struct cl_io *io,
+                               struct cl_object *obj, struct osc_lock *oscl)
+{
+       struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
+       pgoff_t io_start;
+       pgoff_t io_end;
+
+       if (!cl_object_same(io->ci_obj, obj))
+               return;
+
+       if (likely(io->ci_type == CIT_WRITE)) {
+               io_start = cl_index(obj, io->u.ci_rw.crw_pos);
+               io_end = cl_index(obj, io->u.ci_rw.crw_pos +
+                                               io->u.ci_rw.crw_count - 1);
+               if (cl_io_is_append(io)) {
+                       io_start = 0;
+                       io_end = CL_PAGE_EOF;
+               }
+       } else {
+               LASSERT(cl_io_is_mkwrite(io));
+               io_start = io_end = io->u.ci_fault.ft_index;
+       }
+
+       if (descr->cld_mode >= CLM_WRITE &&
+           descr->cld_start <= io_start && descr->cld_end >= io_end) {
+               struct osc_io *oio = osc_env_io(env);
+
+               /* There must be only one lock to match the write region */
+               LASSERT(oio->oi_write_osclock == NULL);
+               oio->oi_write_osclock = oscl;
+       }
+}
+
 int osc_lock_init(const struct lu_env *env,
                  struct cl_object *obj, struct cl_lock *lock,
 int osc_lock_init(const struct lu_env *env,
                  struct cl_object *obj, struct cl_lock *lock,
-                 const struct cl_io *unused)
+                 const struct cl_io *io)
 {
 {
-       struct osc_lock *clk;
-       int result;
+       struct osc_lock *oscl;
+       __u32 enqflags = lock->cll_descr.cld_enq_flags;
+
+       OBD_SLAB_ALLOC_PTR_GFP(oscl, osc_lock_kmem, GFP_NOFS);
+       if (oscl == NULL)
+               return -ENOMEM;
+
+       oscl->ols_state = OLS_NEW;
+       spin_lock_init(&oscl->ols_lock);
+       INIT_LIST_HEAD(&oscl->ols_waiting_list);
+       INIT_LIST_HEAD(&oscl->ols_wait_entry);
+       INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
+
+       oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
+       oscl->ols_agl = !!(enqflags & CEF_AGL);
+       if (oscl->ols_agl)
+               oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+       if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
+               oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
+               oscl->ols_glimpse = 1;
+       }
 
 
-       OBD_SLAB_ALLOC_PTR_GFP(clk, osc_lock_kmem, GFP_NOFS);
-       if (clk != NULL) {
-               __u32 enqflags = lock->cll_descr.cld_enq_flags;
+       cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops);
 
 
-               osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo);
-               clk->ols_state = OLS_NEW;
+       if (!(enqflags & CEF_MUST))
+               /* try to convert this lock to a lockless lock */
+               osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER));
+       if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
+               oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
 
 
-               clk->ols_flags = osc_enq2ldlm_flags(enqflags);
-               clk->ols_agl = !!(enqflags & CEF_AGL);
-               if (clk->ols_agl)
-                       clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
-               if (clk->ols_flags & LDLM_FL_HAS_INTENT)
-                       clk->ols_glimpse = 1;
+       if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
+               osc_lock_set_writer(env, io, obj, oscl);
 
 
-               cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops);
+       LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags "LPX64"\n",
+                         lock, oscl, oscl->ols_flags);
 
 
-               if (!(enqflags & CEF_MUST))
-                       /* try to convert this lock to a lockless lock */
-                       osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER));
-               if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
-                       clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
+       return 0;
+}
 
 
-               LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags "LPX64,
-                               lock, clk, clk->ols_flags);
+/**
+ * Finds an existing lock covering given index and optionally different from a
+ * given \a except lock.
+ */
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+                                      struct osc_object *obj, pgoff_t index,
+                                      int pending, int canceling)
+{
+       struct osc_thread_info *info = osc_env_info(env);
+       struct ldlm_res_id     *resname = &info->oti_resname;
+       ldlm_policy_data_t     *policy  = &info->oti_policy;
+       struct lustre_handle   lockh;
+       struct ldlm_lock       *lock = NULL;
+       ldlm_mode_t            mode;
+       __u64                  flags;
 
 
-               result = 0;
-       } else
-               result = -ENOMEM;
-       return result;
-}
+       ENTRY;
+
+       ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
+       osc_index2policy(policy, osc2cl(obj), index, index);
+       policy->l_extent.gid = LDLM_GID_ANY;
 
 
+       flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+       if (pending)
+               flags |= LDLM_FL_CBPENDING;
+       /*
+        * It is fine to match any group lock since there could be only one
+        * with a uniq gid and it conflicts with all other lock modes too
+        */
+again:
+       mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
+                              flags, resname, LDLM_EXTENT, policy,
+                              LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling);
+       if (mode != 0) {
+               lock = ldlm_handle2lock(&lockh);
+               /* RACE: the lock is cancelled so let's try again */
+               if (unlikely(lock == NULL))
+                       goto again;
+       }
+
+       RETURN(lock);
+}
 /** @} osc */
 /** @} osc */
index 29eaaf8..e1e4fdb 100644 (file)
@@ -99,6 +99,8 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
        atomic_set(&osc->oo_nr_writes, 0);
        spin_lock_init(&osc->oo_lock);
        spin_lock_init(&osc->oo_tree_lock);
        atomic_set(&osc->oo_nr_writes, 0);
        spin_lock_init(&osc->oo_lock);
        spin_lock_init(&osc->oo_tree_lock);
+       spin_lock_init(&osc->oo_ol_spin);
+       INIT_LIST_HEAD(&osc->oo_ol_list);
 
        cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
 
        cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
@@ -125,6 +127,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
        LASSERT(list_empty(&osc->oo_reading_exts));
        LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
        LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
        LASSERT(list_empty(&osc->oo_reading_exts));
        LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
        LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+       LASSERT(list_empty(&osc->oo_ol_list));
 
        lu_object_fini(obj);
        OBD_SLAB_FREE_PTR(osc, osc_object_kmem);
 
        lu_object_fini(obj);
        OBD_SLAB_FREE_PTR(osc, osc_object_kmem);
@@ -202,6 +205,32 @@ static int osc_object_glimpse(const struct lu_env *env,
         RETURN(0);
 }
 
         RETURN(0);
 }
 
+static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+       ENTRY;
+
+       LASSERT(lock->l_granted_mode == lock->l_req_mode);
+       if (lock->l_ast_data == data)
+               lock->l_ast_data = NULL;
+       RETURN(LDLM_ITER_CONTINUE);
+}
+
+static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+       struct osc_object       *osc = cl2osc(obj);
+       struct ldlm_res_id      *resname = &osc_env_info(env)->oti_resname;
+
+       LASSERTF(osc->oo_npages == 0,
+                DFID "still have %lu pages, obj: %p, osc: %p\n",
+                PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc);
+
+       /* DLM locks don't hold a reference of osc_object so we have to
+        * clear it before the object is being destroyed. */
+       ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+       ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+                             osc_object_ast_clear, osc);
+       return 0;
+}
 
 void osc_object_set_contended(struct osc_object *obj)
 {
 
 void osc_object_set_contended(struct osc_object *obj)
 {
@@ -242,21 +271,21 @@ int osc_object_is_contended(struct osc_object *obj)
 }
 
 static const struct cl_object_operations osc_ops = {
 }
 
 static const struct cl_object_operations osc_ops = {
-        .coo_page_init = osc_page_init,
-        .coo_lock_init = osc_lock_init,
-        .coo_io_init   = osc_io_init,
-        .coo_attr_get  = osc_attr_get,
-        .coo_attr_set  = osc_attr_set,
-        .coo_glimpse   = osc_object_glimpse
+       .coo_page_init = osc_page_init,
+       .coo_lock_init = osc_lock_init,
+       .coo_io_init   = osc_io_init,
+       .coo_attr_get  = osc_attr_get,
+       .coo_attr_set  = osc_attr_set,
+       .coo_glimpse   = osc_object_glimpse,
+       .coo_prune     = osc_object_prune
 };
 
 static const struct lu_object_operations osc_lu_obj_ops = {
 };
 
 static const struct lu_object_operations osc_lu_obj_ops = {
-        .loo_object_init      = osc_object_init,
-        .loo_object_delete    = NULL,
-        .loo_object_release   = NULL,
-        .loo_object_free      = osc_object_free,
-        .loo_object_print     = osc_object_print,
-        .loo_object_invariant = NULL
+       .loo_object_init      = osc_object_init,
+       .loo_object_release   = NULL,
+       .loo_object_free      = osc_object_free,
+       .loo_object_print     = osc_object_print,
+       .loo_object_invariant = NULL
 };
 
 struct lu_object *osc_object_alloc(const struct lu_env *env,
 };
 
 struct lu_object *osc_object_alloc(const struct lu_env *env,
index 8d6d69d..e405d4c 100644 (file)
@@ -236,17 +236,17 @@ static int osc_page_is_under_lock(const struct lu_env *env,
                                  const struct cl_page_slice *slice,
                                  struct cl_io *unused, pgoff_t *max_index)
 {
                                  const struct cl_page_slice *slice,
                                  struct cl_io *unused, pgoff_t *max_index)
 {
-       struct osc_page *opg = cl2osc_page(slice);
-       struct cl_lock *lock;
-       int             result = -ENODATA;
-       ENTRY;
+       struct osc_page         *opg = cl2osc_page(slice);
+       struct ldlm_lock        *dlmlock;
+       int                     result = -ENODATA;
 
 
-       *max_index = 0;
-       lock = cl_lock_at_pgoff(env, slice->cpl_obj, osc_index(opg),
-                               NULL, 1, 0);
-       if (lock != NULL) {
-               *max_index = lock->cll_descr.cld_end;
-               cl_lock_put(env, lock);
+       ENTRY;
+       dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
+                                      osc_index(opg), 1, 0);
+       if (dlmlock != NULL) {
+               *max_index = cl_index(slice->cpl_obj,
+                                     dlmlock->l_policy_data.l_extent.end);
+               LDLM_LOCK_PUT(dlmlock);
                result = 0;
        }
        RETURN(result);
                result = 0;
        }
        RETURN(result);
index e1b4b49..a1e70f0 100644 (file)
@@ -85,14 +85,15 @@ struct osc_fsync_args {
 };
 
 struct osc_enqueue_args {
 };
 
 struct osc_enqueue_args {
-       struct obd_export               *oa_exp;
-       __u64                           *oa_flags;
-       obd_enqueue_update_f             oa_upcall;
-       void                            *oa_cookie;
-       struct ost_lvb                  *oa_lvb;
-       struct lustre_handle            *oa_lockh;
-       struct ldlm_enqueue_info        *oa_ei;
-       unsigned int                     oa_agl:1;
+       struct obd_export       *oa_exp;
+       ldlm_type_t             oa_type;
+       ldlm_mode_t             oa_mode;
+       __u64                   *oa_flags;
+       osc_enqueue_upcall_f    oa_upcall;
+       void                    *oa_cookie;
+       struct ost_lvb          *oa_lvb;
+       struct lustre_handle    oa_lockh;
+       unsigned int            oa_agl:1;
 };
 
 static void osc_release_ppga(struct brw_page **ppga, obd_count count);
 };
 
 static void osc_release_ppga(struct brw_page **ppga, obd_count count);
@@ -2103,14 +2104,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
         LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
 
         lock_res_and_lock(lock);
         LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
 
         lock_res_and_lock(lock);
-       spin_lock(&osc_ast_guard);
 
        if (lock->l_ast_data == NULL)
                lock->l_ast_data = data;
        if (lock->l_ast_data == data)
                set = 1;
 
 
        if (lock->l_ast_data == NULL)
                lock->l_ast_data = data;
        if (lock->l_ast_data == data)
                set = 1;
 
-       spin_unlock(&osc_ast_guard);
        unlock_res_and_lock(lock);
 
        return set;
        unlock_res_and_lock(lock);
 
        return set;
@@ -2162,37 +2161,41 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
         return(rc);
 }
 
         return(rc);
 }
 
-static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
-                            obd_enqueue_update_f upcall, void *cookie,
-                           __u64 *flags, int agl, int rc)
+static int osc_enqueue_fini(struct ptlrpc_request *req,
+                           osc_enqueue_upcall_f upcall, void *cookie,
+                           struct lustre_handle *lockh, ldlm_mode_t mode,
+                           __u64 *flags, int agl, int errcode)
 {
 {
-        int intent = *flags & LDLM_FL_HAS_INTENT;
-        ENTRY;
-
-        if (intent) {
-                /* The request was created before ldlm_cli_enqueue call. */
-                if (rc == ELDLM_LOCK_ABORTED) {
-                        struct ldlm_reply *rep;
-                        rep = req_capsule_server_get(&req->rq_pill,
-                                                     &RMF_DLM_REP);
-
-                        LASSERT(rep != NULL);
-                       rep->lock_policy_res1 =
-                               ptlrpc_status_ntoh(rep->lock_policy_res1);
-                        if (rep->lock_policy_res1)
-                                rc = rep->lock_policy_res1;
-                }
-        }
+       bool intent = *flags & LDLM_FL_HAS_INTENT;
+       int rc;
+       ENTRY;
 
 
-        if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
-            (rc == 0)) {
-                *flags |= LDLM_FL_LVB_READY;
-                CDEBUG(D_INODE,"got kms "LPU64" blocks "LPU64" mtime "LPU64"\n",
-                       lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
-        }
+       /* The request was created before ldlm_cli_enqueue call. */
+       if (intent && errcode == ELDLM_LOCK_ABORTED) {
+               struct ldlm_reply *rep;
+
+               rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
+               LASSERT(rep != NULL);
+
+               rep->lock_policy_res1 =
+                       ptlrpc_status_ntoh(rep->lock_policy_res1);
+               if (rep->lock_policy_res1)
+                       errcode = rep->lock_policy_res1;
+               if (!agl)
+                       *flags |= LDLM_FL_LVB_READY;
+       } else if (errcode == ELDLM_OK) {
+               *flags |= LDLM_FL_LVB_READY;
+       }
 
         /* Call the update callback. */
 
         /* Call the update callback. */
-        rc = (*upcall)(cookie, rc);
+       rc = (*upcall)(cookie, lockh, errcode);
+
+       /* release the reference taken in ldlm_cli_enqueue() */
+       if (errcode == ELDLM_LOCK_MATCHED)
+               errcode = ELDLM_OK;
+       if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
+               ldlm_lock_decref(lockh, mode);
+
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -2200,65 +2203,53 @@ static int osc_enqueue_interpret(const struct lu_env *env,
                                  struct ptlrpc_request *req,
                                  struct osc_enqueue_args *aa, int rc)
 {
                                  struct ptlrpc_request *req,
                                  struct osc_enqueue_args *aa, int rc)
 {
-        struct ldlm_lock *lock;
-        struct lustre_handle handle;
-        __u32 mode;
-        struct ost_lvb *lvb;
-        __u32 lvb_len;
-       __u64 *flags = aa->oa_flags;
+       struct ldlm_lock *lock;
+       struct lustre_handle *lockh = &aa->oa_lockh;
+       ldlm_mode_t mode = aa->oa_mode;
+       struct ost_lvb *lvb = aa->oa_lvb;
+       __u32 lvb_len = sizeof(*lvb);
+       __u64 flags = 0;
 
 
-        /* Make a local copy of a lock handle and a mode, because aa->oa_*
-         * might be freed anytime after lock upcall has been called. */
-        lustre_handle_copy(&handle, aa->oa_lockh);
-        mode = aa->oa_ei->ei_mode;
+       ENTRY;
 
 
-        /* ldlm_cli_enqueue is holding a reference on the lock, so it must
-         * be valid. */
-        lock = ldlm_handle2lock(&handle);
+       /* ldlm_cli_enqueue is holding a reference on the lock, so it must
+        * be valid. */
+       lock = ldlm_handle2lock(lockh);
+       LASSERTF(lock != NULL,
+                "lockh "LPX64", req %p, aa %p - client evicted?\n",
+                lockh->cookie, req, aa);
 
 
-        /* Take an additional reference so that a blocking AST that
-         * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
-         * to arrive after an upcall has been executed by
-         * osc_enqueue_fini(). */
-        ldlm_lock_addref(&handle, mode);
+       /* Take an additional reference so that a blocking AST that
+        * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
+        * to arrive after an upcall has been executed by
+        * osc_enqueue_fini(). */
+       ldlm_lock_addref(lockh, mode);
 
        /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
        OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
 
 
        /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
        OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
 
-        /* Let CP AST to grant the lock first. */
-        OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
+       /* Let CP AST to grant the lock first. */
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
 
 
-        if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
-                lvb = NULL;
-                lvb_len = 0;
-        } else {
-                lvb = aa->oa_lvb;
-                lvb_len = sizeof(*aa->oa_lvb);
-        }
+       if (aa->oa_agl) {
+               LASSERT(aa->oa_lvb == NULL);
+               LASSERT(aa->oa_flags == NULL);
+               aa->oa_flags = &flags;
+       }
 
 
-        /* Complete obtaining the lock procedure. */
-        rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
-                                   mode, flags, lvb, lvb_len, &handle, rc);
-        /* Complete osc stuff. */
-        rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
-                              flags, aa->oa_agl, rc);
+       /* Complete obtaining the lock procedure. */
+       rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
+                                  aa->oa_mode, aa->oa_flags, lvb, lvb_len,
+                                  lockh, rc);
+       /* Complete osc stuff. */
+       rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
+                             aa->oa_flags, aa->oa_agl, rc);
 
         OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
 
 
         OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
 
-        /* Release the lock for async request. */
-        if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
-                /*
-                 * Releases a reference taken by ldlm_cli_enqueue(), if it is
-                 * not already released by
-                 * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
-                 */
-                ldlm_lock_decref(&handle, mode);
-
-        LASSERTF(lock != NULL, "lockh %p, req %p, aa %p - client evicted?\n",
-                 aa->oa_lockh, req, aa);
-        ldlm_lock_decref(&handle, mode);
-        LDLM_LOCK_PUT(lock);
-        return rc;
+       ldlm_lock_decref(lockh, mode);
+       LDLM_LOCK_PUT(lock);
+       RETURN(rc);
 }
 
 struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
 }
 
 struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
@@ -2268,20 +2259,20 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
  * other synchronous requests, however keeping some locks and trying to obtain
  * others may take a considerable amount of time in a case of ost failure; and
  * when other sync requests do not get released lock from a client, the client
  * other synchronous requests, however keeping some locks and trying to obtain
  * others may take a considerable amount of time in a case of ost failure; and
  * when other sync requests do not get released lock from a client, the client
- * is excluded from the cluster -- such scenarious make the life difficult, so
+ * is evicted from the cluster -- such scenarious make the life difficult, so
  * release locks just after they are obtained. */
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                     __u64 *flags, ldlm_policy_data_t *policy,
                     struct ost_lvb *lvb, int kms_valid,
  * release locks just after they are obtained. */
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                     __u64 *flags, ldlm_policy_data_t *policy,
                     struct ost_lvb *lvb, int kms_valid,
-                    obd_enqueue_update_f upcall, void *cookie,
+                    osc_enqueue_upcall_f upcall, void *cookie,
                     struct ldlm_enqueue_info *einfo,
                     struct ldlm_enqueue_info *einfo,
-                    struct lustre_handle *lockh,
                     struct ptlrpc_request_set *rqset, int async, int agl)
 {
        struct obd_device *obd = exp->exp_obd;
                     struct ptlrpc_request_set *rqset, int async, int agl)
 {
        struct obd_device *obd = exp->exp_obd;
+       struct lustre_handle lockh = { 0 };
        struct ptlrpc_request *req = NULL;
        int intent = *flags & LDLM_FL_HAS_INTENT;
        struct ptlrpc_request *req = NULL;
        int intent = *flags & LDLM_FL_HAS_INTENT;
-       __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+       __u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY;
        ldlm_mode_t mode;
        int rc;
        ENTRY;
        ldlm_mode_t mode;
        int rc;
        ENTRY;
@@ -2316,50 +2307,41 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
         if (einfo->ei_mode == LCK_PR)
                 mode |= LCK_PW;
         mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
         if (einfo->ei_mode == LCK_PR)
                 mode |= LCK_PW;
         mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
-                               einfo->ei_type, policy, mode, lockh, 0);
-        if (mode) {
-                struct ldlm_lock *matched = ldlm_handle2lock(lockh);
-
-               if ((agl != 0) && !ldlm_is_lvb_ready(matched)) {
-                        /* For AGL, if enqueue RPC is sent but the lock is not
-                         * granted, then skip to process this strpe.
-                         * Return -ECANCELED to tell the caller. */
-                        ldlm_lock_decref(lockh, mode);
-                        LDLM_LOCK_PUT(matched);
-                        RETURN(-ECANCELED);
-                } else if (osc_set_lock_data_with_check(matched, einfo)) {
-                        *flags |= LDLM_FL_LVB_READY;
-                        /* addref the lock only if not async requests and PW
-                         * lock is matched whereas we asked for PR. */
-                        if (!rqset && einfo->ei_mode != mode)
-                                ldlm_lock_addref(lockh, LCK_PR);
-                        if (intent) {
-                                /* I would like to be able to ASSERT here that
-                                 * rss <= kms, but I can't, for reasons which
-                                 * are explained in lov_enqueue() */
-                        }
+                              einfo->ei_type, policy, mode, &lockh, 0);
+       if (mode) {
+               struct ldlm_lock *matched;
+
+               if (*flags & LDLM_FL_TEST_LOCK)
+                       RETURN(ELDLM_OK);
+
+               matched = ldlm_handle2lock(&lockh);
+               if (agl) {
+                       /* AGL enqueues DLM locks speculatively. Therefore if
+                        * it already exists a DLM lock, it wll just inform the
+                        * caller to cancel the AGL process for this stripe. */
+                       ldlm_lock_decref(&lockh, mode);
+                       LDLM_LOCK_PUT(matched);
+                       RETURN(-ECANCELED);
+               } else if (osc_set_lock_data_with_check(matched, einfo)) {
+                       *flags |= LDLM_FL_LVB_READY;
+
+                       /* We already have a lock, and it's referenced. */
+                       (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
+
+                       ldlm_lock_decref(&lockh, mode);
+                       LDLM_LOCK_PUT(matched);
+                       RETURN(ELDLM_OK);
+               } else {
+                       ldlm_lock_decref(&lockh, mode);
+                       LDLM_LOCK_PUT(matched);
+               }
+       }
 
 
-                       /* We already have a lock, and it's referenced.
-                        *
-                        * At this point, the cl_lock::cll_state is CLS_QUEUING,
-                        * AGL upcall may change it to CLS_HELD directly. */
-                        (*upcall)(cookie, ELDLM_OK);
-
-                        if (einfo->ei_mode != mode)
-                                ldlm_lock_decref(lockh, LCK_PW);
-                        else if (rqset)
-                                /* For async requests, decref the lock. */
-                                ldlm_lock_decref(lockh, einfo->ei_mode);
-                        LDLM_LOCK_PUT(matched);
-                        RETURN(ELDLM_OK);
-                } else {
-                        ldlm_lock_decref(lockh, mode);
-                        LDLM_LOCK_PUT(matched);
-                }
-        }
+no_match:
+       if (*flags & LDLM_FL_TEST_LOCK)
+               RETURN(-ENOLCK);
 
 
- no_match:
-        if (intent) {
+       if (intent) {
                req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                           &RQF_LDLM_ENQUEUE_LVB);
                if (req == NULL)
                req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                           &RQF_LDLM_ENQUEUE_LVB);
                if (req == NULL)
@@ -2380,20 +2362,29 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
         *flags &= ~LDLM_FL_BLOCK_GRANTED;
 
         rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
         *flags &= ~LDLM_FL_BLOCK_GRANTED;
 
         rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
-                             sizeof(*lvb), LVB_T_OST, lockh, async);
-        if (rqset) {
-                if (!rc) {
-                        struct osc_enqueue_args *aa;
-                        CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
-                        aa = ptlrpc_req_async_args(req);
-                        aa->oa_ei = einfo;
-                        aa->oa_exp = exp;
-                        aa->oa_flags  = flags;
-                        aa->oa_upcall = upcall;
-                        aa->oa_cookie = cookie;
-                        aa->oa_lvb    = lvb;
-                        aa->oa_lockh  = lockh;
-                        aa->oa_agl    = !!agl;
+                             sizeof(*lvb), LVB_T_OST, &lockh, async);
+       if (async) {
+               if (!rc) {
+                       struct osc_enqueue_args *aa;
+                       CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+                       aa = ptlrpc_req_async_args(req);
+                       aa->oa_exp    = exp;
+                       aa->oa_mode   = einfo->ei_mode;
+                       aa->oa_type   = einfo->ei_type;
+                       lustre_handle_copy(&aa->oa_lockh, &lockh);
+                       aa->oa_upcall = upcall;
+                       aa->oa_cookie = cookie;
+                       aa->oa_agl    = !!agl;
+                       if (!agl) {
+                               aa->oa_flags  = flags;
+                               aa->oa_lvb    = lvb;
+                       } else {
+                               /* AGL is essentially to enqueue an DLM lock
+                                * in advance, so we don't care about the
+                                * result of AGL enqueue. */
+                               aa->oa_lvb    = NULL;
+                               aa->oa_flags  = NULL;
+                       }
 
                         req->rq_interpret_reply =
                                 (ptlrpc_interpterer_t)osc_enqueue_interpret;
 
                         req->rq_interpret_reply =
                                 (ptlrpc_interpterer_t)osc_enqueue_interpret;
@@ -2407,11 +2398,12 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
 
-        rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
-        if (intent)
-                ptlrpc_req_finished(req);
+       rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
+                             flags, agl, rc);
+       if (intent)
+               ptlrpc_req_finished(req);
 
 
-        RETURN(rc);
+       RETURN(rc);
 }
 
 int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 }
 
 int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
@@ -3271,7 +3263,6 @@ struct obd_ops osc_obd_ops = {
 };
 
 extern struct lu_kmem_descr osc_caches[];
 };
 
 extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
 extern struct lock_class_key osc_ast_guard_class;
 
 int __init osc_init(void)
 extern struct lock_class_key osc_ast_guard_class;
 
 int __init osc_init(void)
@@ -3301,9 +3292,6 @@ int __init osc_init(void)
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
 
-       spin_lock_init(&osc_ast_guard);
-       lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
-
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
index 5229565..4246365 100644 (file)
@@ -757,14 +757,14 @@ test_32a() { # bug 11270
         log "checking cached lockless truncate"
         $TRUNCATE $DIR1/$tfile 8000000
         $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size"
         log "checking cached lockless truncate"
         $TRUNCATE $DIR1/$tfile 8000000
         $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size"
-        [ $(calc_osc_stats lockless_truncate) -eq 0 ] ||
-                error "lockless truncate doesn't use cached locks"
+       [ $(calc_osc_stats lockless_truncate) -ne 0 ] ||
+               error "cached truncate isn't lockless"
 
         log "checking not cached lockless truncate"
         $TRUNCATE $DIR2/$tfile 5000000
         $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size"
 
         log "checking not cached lockless truncate"
         $TRUNCATE $DIR2/$tfile 5000000
         $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size"
-        [ $(calc_osc_stats lockless_truncate) -ne 0 ] ||
-                error "not cached trancate isn't lockless"
+       [ $(calc_osc_stats lockless_truncate) -ne 0 ] ||
+               error "not cached truncate isn't lockless"
 
         log "disabled lockless truncate"
         enable_lockless_truncate 0
 
         log "disabled lockless truncate"
         enable_lockless_truncate 0