Whamcloud - gitweb
LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier()
[fs/lustre-release.git] / lustre / obdclass / cl_object.c
index eb23c9b..e3bc29f 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * Client Lustre Object.
  *
 
 #define DEBUG_SUBSYSTEM S_CLASS
 
+#include <linux/list.h>
 #include <libcfs/libcfs.h>
-/* class_put_type() */
 #include <obd_class.h>
 #include <obd_support.h>
 #include <lustre_fid.h>
-#include <libcfs/list.h>
-#include <libcfs/libcfs_hash.h> /* for cfs_hash stuff */
 #include <cl_object.h>
 #include <lu_object.h>
 #include "cl_internal.h"
 
 static struct kmem_cache *cl_env_kmem;
+struct kmem_cache *cl_dio_aio_kmem;
+struct kmem_cache *cl_sub_dio_kmem;
+struct kmem_cache *cl_page_kmem_array[16];
+unsigned short cl_page_kmem_size_array[16];
 
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
@@ -91,7 +88,6 @@ void cl_object_header_fini(struct cl_object_header *h)
 {
         lu_object_header_fini(&h->coh_lu);
 }
-EXPORT_SYMBOL(cl_object_header_fini);
 
 /**
  * Returns a cl_object with a given \a fid.
@@ -202,28 +198,26 @@ EXPORT_SYMBOL(cl_object_attr_unlock);
  * top-to-bottom to fill in parts of \a attr that this layer is responsible
  * for.
  */
-int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
-                       struct cl_attr *attr)
+int cl_object_attr_get(const struct lu_env *env, struct cl_object *top,
+                       struct cl_attr *attr)
 {
-       struct lu_object_header *top;
-       int result;
+       struct cl_object *obj;
+       int result = 0;
 
-       assert_spin_locked(cl_object_attr_guard(obj));
+       assert_spin_locked(cl_object_attr_guard(top));
        ENTRY;
 
-        top = obj->co_lu.lo_header;
-        result = 0;
-       list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
-                if (obj->co_ops->coo_attr_get != NULL) {
-                        result = obj->co_ops->coo_attr_get(env, obj, attr);
-                        if (result != 0) {
-                                if (result > 0)
-                                        result = 0;
-                                break;
-                        }
-                }
-        }
-        RETURN(result);
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_attr_get != NULL) {
+                       result = obj->co_ops->coo_attr_get(env, obj, attr);
+                       if (result != 0) {
+                               if (result > 0)
+                                       result = 0;
+                               break;
+                       }
+               }
+       }
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_object_attr_get);
 
@@ -234,18 +228,16 @@ EXPORT_SYMBOL(cl_object_attr_get);
  * updated. Calls cl_object_operations::coo_upd_attr() on every layer, bottom
  * to top.
  */
-int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
+int cl_object_attr_update(const struct lu_env *env, struct cl_object *top,
                          const struct cl_attr *attr, unsigned v)
 {
-       struct lu_object_header *top;
-       int result;
+       struct cl_object *obj;
+       int result = 0;
 
-       assert_spin_locked(cl_object_attr_guard(obj));
+       assert_spin_locked(cl_object_attr_guard(top));
        ENTRY;
 
-       top = obj->co_lu.lo_header;
-       result = 0;
-       list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
+       cl_object_for_each_reverse(obj, top) {
                if (obj->co_ops->coo_attr_update != NULL) {
                        result = obj->co_ops->coo_attr_update(env, obj, attr,
                                                              v);
@@ -261,6 +253,25 @@ int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
 EXPORT_SYMBOL(cl_object_attr_update);
 
 /**
+ * Mark the inode as dirty when the inode has uncommitted (unstable) pages.
+ * Thus when the system is under momory pressure, it will trigger writeback
+ * on background to commit and unpin the pages.
+ */
+void cl_object_dirty_for_sync(const struct lu_env *env, struct cl_object *top)
+{
+       struct cl_object *obj;
+
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_dirty_for_sync != NULL)
+                       obj->co_ops->coo_dirty_for_sync(env, obj);
+       }
+       EXIT;
+}
+EXPORT_SYMBOL(cl_object_dirty_for_sync);
+
+/**
  * Notifies layers (bottom-to-top) that glimpse AST was received.
  *
  * Layers have to fill \a lvb fields with information that will be shipped
@@ -268,70 +279,63 @@ EXPORT_SYMBOL(cl_object_attr_update);
  *
  * \see cl_lock_operations::clo_glimpse()
  */
-int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
-                      struct ost_lvb *lvb)
+int cl_object_glimpse(const struct lu_env *env, struct cl_object *top,
+                     struct ost_lvb *lvb)
 {
-        struct lu_object_header *top;
-        int result;
+       struct cl_object *obj;
+       int result = 0;
 
-        ENTRY;
-        top = obj->co_lu.lo_header;
-        result = 0;
-       list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
-                if (obj->co_ops->coo_glimpse != NULL) {
-                        result = obj->co_ops->coo_glimpse(env, obj, lvb);
-                        if (result != 0)
-                                break;
-                }
-        }
-        LU_OBJECT_HEADER(D_DLMTRACE, env, lu_object_top(top),
-                         "size: "LPU64" mtime: "LPU64" atime: "LPU64" "
-                         "ctime: "LPU64" blocks: "LPU64"\n",
-                         lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
-                         lvb->lvb_ctime, lvb->lvb_blocks);
-        RETURN(result);
+       ENTRY;
+       cl_object_for_each_reverse(obj, top) {
+               if (obj->co_ops->coo_glimpse != NULL) {
+                       result = obj->co_ops->coo_glimpse(env, obj, lvb);
+                       if (result != 0)
+                               break;
+               }
+       }
+       LU_OBJECT_HEADER(D_DLMTRACE, env, lu_object_top(top->co_lu.lo_header),
+                        "size: %llu mtime: %llu atime: %llu "
+                        "ctime: %llu blocks: %llu\n",
+                        lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
+                        lvb->lvb_ctime, lvb->lvb_blocks);
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_object_glimpse);
 
 /**
  * Updates a configuration of an object \a obj.
  */
-int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
-                const struct cl_object_conf *conf)
+int cl_conf_set(const struct lu_env *env, struct cl_object *top,
+               const struct cl_object_conf *conf)
 {
-        struct lu_object_header *top;
-        int result;
+       struct cl_object *obj;
+       int result = 0;
 
-        ENTRY;
-        top = obj->co_lu.lo_header;
-        result = 0;
-       list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
-                if (obj->co_ops->coo_conf_set != NULL) {
-                        result = obj->co_ops->coo_conf_set(env, obj, conf);
-                        if (result != 0)
-                                break;
-                }
-        }
-        RETURN(result);
+       ENTRY;
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_conf_set != NULL) {
+                       result = obj->co_ops->coo_conf_set(env, obj, conf);
+                       if (result)
+                               break;
+               }
+       }
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_conf_set);
 
 /**
  * Prunes caches of pages and locks for this object.
  */
-int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+int cl_object_prune(const struct lu_env *env, struct cl_object *top)
 {
-       struct lu_object_header *top;
-       struct cl_object *o;
-       int result;
+       struct cl_object *obj;
+       int result = 0;
        ENTRY;
 
-       top = obj->co_lu.lo_header;
-       result = 0;
-       list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
-               if (o->co_ops->coo_prune != NULL) {
-                       result = o->co_ops->coo_prune(env, o);
-                       if (result != 0)
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_prune != NULL) {
+                       result = obj->co_ops->coo_prune(env, obj);
+                       if (result)
                                break;
                }
        }
@@ -343,18 +347,18 @@ EXPORT_SYMBOL(cl_object_prune);
 /**
  * Get stripe information of this object.
  */
-int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
-                       struct lov_user_md __user *uarg)
+int cl_object_getstripe(const struct lu_env *env, struct cl_object *top,
+                       struct lov_user_md __user *uarg, size_t size)
 {
-       struct lu_object_header *top;
-       int                     result = 0;
+       struct cl_object *obj;
+       int result = 0;
        ENTRY;
 
-       top = obj->co_lu.lo_header;
-       list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
-               if (obj->co_ops->coo_getstripe != NULL) {
-                       result = obj->co_ops->coo_getstripe(env, obj, uarg);
-                       if (result != 0)
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_getstripe) {
+                       result = obj->co_ops->coo_getstripe(env, obj, uarg,
+                                                           size);
+                       if (result)
                                break;
                }
        }
@@ -363,28 +367,104 @@ int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
 EXPORT_SYMBOL(cl_object_getstripe);
 
 /**
- * Find whether there is any callback data (ldlm lock) attached upon this
- * object.
+ * Get fiemap extents from file object.
+ *
+ * \param env [in]     lustre environment
+ * \param obj [in]     file object
+ * \param key [in]     fiemap request argument
+ * \param fiemap [out] fiemap extents mapping retrived
+ * \param buflen [in]  max buffer length of @fiemap
+ *
+ * \retval 0   success
+ * \retval < 0 error
  */
-int cl_object_find_cbdata(const struct lu_env *env, struct cl_object *obj,
-                          ldlm_iterator_t iter, void *data)
+int cl_object_fiemap(const struct lu_env *env, struct cl_object *top,
+                    struct ll_fiemap_info_key *key,
+                    struct fiemap *fiemap, size_t *buflen)
 {
-       struct lu_object_header *top;
-       int                     result = 0;
+       struct cl_object *obj;
+       int result = 0;
        ENTRY;
 
-       top = obj->co_lu.lo_header;
-       list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
-               if (obj->co_ops->coo_find_cbdata != NULL) {
-                       result = obj->co_ops->coo_find_cbdata(env, obj, iter,
-                                                             data);
-                       if (result != 0)
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_fiemap) {
+                       result = obj->co_ops->coo_fiemap(env, obj, key, fiemap,
+                                                        buflen);
+                       if (result)
                                break;
                }
        }
        RETURN(result);
 }
-EXPORT_SYMBOL(cl_object_find_cbdata);
+EXPORT_SYMBOL(cl_object_fiemap);
+
+int cl_object_layout_get(const struct lu_env *env, struct cl_object *top,
+                        struct cl_layout *cl)
+{
+       struct cl_object *obj;
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_layout_get)
+                       return obj->co_ops->coo_layout_get(env, obj, cl);
+       }
+
+       RETURN(-EOPNOTSUPP);
+}
+EXPORT_SYMBOL(cl_object_layout_get);
+
+loff_t cl_object_maxbytes(struct cl_object *top)
+{
+       struct cl_object *obj;
+       loff_t maxbytes = LLONG_MAX;
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_maxbytes)
+                       maxbytes = min_t(loff_t, obj->co_ops->coo_maxbytes(obj),
+                                        maxbytes);
+       }
+
+       RETURN(maxbytes);
+}
+EXPORT_SYMBOL(cl_object_maxbytes);
+
+int cl_object_flush(const struct lu_env *env, struct cl_object *top,
+                        struct ldlm_lock *lock)
+{
+       struct cl_object *obj;
+       int rc = 0;
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_object_flush) {
+                       rc = obj->co_ops->coo_object_flush(env, obj, lock);
+                       if (rc)
+                               break;
+               }
+       }
+       RETURN(rc);
+}
+EXPORT_SYMBOL(cl_object_flush);
+
+int cl_object_inode_ops(const struct lu_env *env, struct cl_object *top,
+                       enum coo_inode_opc opc, void *data)
+{
+       struct cl_object *obj;
+       int rc = 0;
+
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_inode_ops) {
+                       rc = obj->co_ops->coo_inode_ops(env, obj, opc, data);
+                       if (rc)
+                               break;
+               }
+       }
+       RETURN(rc);
+}
+EXPORT_SYMBOL(cl_object_inode_ops);
 
 /**
  * Helper function removing all object locks, and marking object for
@@ -478,7 +558,7 @@ static struct cache_stats cl_env_stats = {
  */
 int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
 {
-       static const char *pstate[] = {
+       static const char *const pstate[] = {
                [CPS_CACHED]    = "c",
                [CPS_OWNED]     = "o",
                [CPS_PAGEOUT]   = "w",
@@ -512,57 +592,19 @@ EXPORT_SYMBOL(cl_site_stats_print);
  *
  */
 
-/**
- * The most efficient way is to store cl_env pointer in task specific
- * structures. On Linux, it wont' be easy to use task_struct->journal_info
- * because Lustre code may call into other fs which has certain assumptions
- * about journal_info. Currently following fields in task_struct are identified
- * can be used for this purpose:
- *  - cl_env: for liblustre.
- *  - tux_info: ony on RedHat kernel.
- *  - ...
- * \note As long as we use task_struct to store cl_env, we assume that once
- * called into Lustre, we'll never call into the other part of the kernel
- * which will use those fields in task_struct without explicitly exiting
- * Lustre.
- *
- * If there's no space in task_struct is available, hash will be used.
- * bz20044, bz22683.
- */
-
-static struct list_head cl_envs;
-static unsigned cl_envs_cached_nr  = 0;
-static unsigned cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
-                                           * for now. */
-static DEFINE_SPINLOCK(cl_envs_guard);
+static unsigned cl_envs_cached_max = 32; /* XXX: prototype: arbitrary limit
+                                         * for now. */
+static struct cl_env_cache {
+       rwlock_t                cec_guard;
+       unsigned                cec_count;
+       struct list_head        cec_envs;
+} *cl_envs = NULL;
 
 struct cl_env {
         void             *ce_magic;
         struct lu_env     ce_lu;
         struct lu_context ce_ses;
 
-#ifdef LL_TASK_CL_ENV
-        void             *ce_prev;
-#else
-        /**
-         * This allows cl_env to be entered into cl_env_hash which implements
-         * the current thread -> client environment lookup.
-         */
-       struct hlist_node  ce_node;
-#endif
-       /**
-        * Owner for the current cl_env.
-        *
-        * If LL_TASK_CL_ENV is defined, this point to the owning current,
-        * only for debugging purpose ;
-        * Otherwise hash is used, and this is the key for cfs_hash.
-        * Now current thread pid is stored. Note using thread pointer would
-        * lead to unbalanced hash because of its specific allocation locality
-        * and could be varied for different platforms and OSes, even different
-        * OS versions.
-        */
-       void             *ce_owner;
-
         /*
          * Linkage into global list of all client environments. Used for
          * garbage collection.
@@ -579,173 +621,30 @@ struct cl_env {
         void             *ce_debug;
 };
 
+static void cl_env_inc(enum cache_stats_item item)
+{
 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
-#define CL_ENV_INC(counter) atomic_inc(&cl_env_stats.cs_stats[CS_##counter])
-
-#define CL_ENV_DEC(counter) do {                                              \
-       LASSERT(atomic_read(&cl_env_stats.cs_stats[CS_##counter]) > 0);   \
-       atomic_dec(&cl_env_stats.cs_stats[CS_##counter]);                 \
-} while (0)
-#else
-#define CL_ENV_INC(counter)
-#define CL_ENV_DEC(counter)
+       atomic_inc(&cl_env_stats.cs_stats[item]);
 #endif
-
-static void cl_env_init0(struct cl_env *cle, void *debug)
-{
-        LASSERT(cle->ce_ref == 0);
-        LASSERT(cle->ce_magic == &cl_env_init0);
-        LASSERT(cle->ce_debug == NULL && cle->ce_owner == NULL);
-
-        cle->ce_ref = 1;
-        cle->ce_debug = debug;
-        CL_ENV_INC(busy);
 }
 
-
-#ifndef LL_TASK_CL_ENV
-/*
- * The implementation of using hash table to connect cl_env and thread
- */
-
-static cfs_hash_t *cl_env_hash;
-
-static unsigned cl_env_hops_hash(cfs_hash_t *lh,
-                                 const void *key, unsigned mask)
+static void cl_env_dec(enum cache_stats_item item)
 {
-#if BITS_PER_LONG == 64
-        return cfs_hash_u64_hash((__u64)key, mask);
-#else
-        return cfs_hash_u32_hash((__u32)key, mask);
+#ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
+       LASSERT(atomic_read(&cl_env_stats.cs_stats[item]) > 0);
+       atomic_dec(&cl_env_stats.cs_stats[item]);
 #endif
 }
 
-static void *cl_env_hops_obj(struct hlist_node *hn)
+static void cl_env_init0(struct cl_env *cle, void *debug)
 {
-       struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
-
+       LASSERT(cle->ce_ref == 0);
        LASSERT(cle->ce_magic == &cl_env_init0);
-       return (void *)cle;
-}
-
-static int cl_env_hops_keycmp(const void *key, struct hlist_node *hn)
-{
-        struct cl_env *cle = cl_env_hops_obj(hn);
-
-        LASSERT(cle->ce_owner != NULL);
-        return (key == cle->ce_owner);
-}
-
-static void cl_env_hops_noop(cfs_hash_t *hs, struct hlist_node *hn)
-{
-       struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
-        LASSERT(cle->ce_magic == &cl_env_init0);
-}
-
-static cfs_hash_ops_t cl_env_hops = {
-        .hs_hash        = cl_env_hops_hash,
-        .hs_key         = cl_env_hops_obj,
-        .hs_keycmp      = cl_env_hops_keycmp,
-        .hs_object      = cl_env_hops_obj,
-        .hs_get         = cl_env_hops_noop,
-        .hs_put_locked  = cl_env_hops_noop,
-};
-
-static inline struct cl_env *cl_env_fetch(void)
-{
-       struct cl_env *cle;
-
-       cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid);
-       LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
-       return cle;
-}
-
-static inline void cl_env_attach(struct cl_env *cle)
-{
-       if (cle) {
-               int rc;
-
-               LASSERT(cle->ce_owner == NULL);
-               cle->ce_owner = (void *) (long) current->pid;
-               rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
-                                        &cle->ce_node);
-               LASSERT(rc == 0);
-       }
-}
-
-static inline void cl_env_do_detach(struct cl_env *cle)
-{
-       void *cookie;
-
-       LASSERT(cle->ce_owner == (void *) (long) current->pid);
-       cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
-                             &cle->ce_node);
-       LASSERT(cookie == cle);
-       cle->ce_owner = NULL;
-}
-
-static int cl_env_store_init(void) {
-        cl_env_hash = cfs_hash_create("cl_env",
-                                      HASH_CL_ENV_BITS, HASH_CL_ENV_BITS,
-                                      HASH_CL_ENV_BKT_BITS, 0,
-                                      CFS_HASH_MIN_THETA,
-                                      CFS_HASH_MAX_THETA,
-                                      &cl_env_hops,
-                                      CFS_HASH_RW_BKTLOCK);
-        return cl_env_hash != NULL ? 0 :-ENOMEM;
-}
-
-static void cl_env_store_fini(void) {
-        cfs_hash_putref(cl_env_hash);
-}
-
-#else /* LL_TASK_CL_ENV */
-/*
- * The implementation of store cl_env directly in thread structure.
- */
-
-static inline struct cl_env *cl_env_fetch(void)
-{
-       struct cl_env *cle;
+       LASSERT(cle->ce_debug == NULL);
 
-       cle = current->LL_TASK_CL_ENV;
-       if (cle && cle->ce_magic != &cl_env_init0)
-               cle = NULL;
-       return cle;
-}
-
-static inline void cl_env_attach(struct cl_env *cle)
-{
-       if (cle) {
-               LASSERT(cle->ce_owner == NULL);
-               cle->ce_owner = current;
-               cle->ce_prev = current->LL_TASK_CL_ENV;
-               current->LL_TASK_CL_ENV = cle;
-       }
-}
-
-static inline void cl_env_do_detach(struct cl_env *cle)
-{
-       LASSERT(cle->ce_owner == current);
-       LASSERT(current->LL_TASK_CL_ENV == cle);
-       current->LL_TASK_CL_ENV = cle->ce_prev;
-       cle->ce_owner = NULL;
-}
-
-static int cl_env_store_init(void) { return 0; }
-static void cl_env_store_fini(void) { }
-
-#endif /* LL_TASK_CL_ENV */
-
-static inline struct cl_env *cl_env_detach(struct cl_env *cle)
-{
-        if (cle == NULL)
-                cle = cl_env_fetch();
-
-        if (cle && cle->ce_owner)
-                cl_env_do_detach(cle);
-
-        return cle;
+       cle->ce_ref = 1;
+       cle->ce_debug = debug;
+       cl_env_inc(CS_busy);
 }
 
 static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
@@ -775,8 +674,8 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
                        OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
                        env = ERR_PTR(rc);
                } else {
-                       CL_ENV_INC(create);
-                       CL_ENV_INC(total);
+                       cl_env_inc(CS_create);
+                       cl_env_inc(CS_total);
                }
        } else
                env = ERR_PTR(-ENOMEM);
@@ -785,27 +684,35 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
 
 static void cl_env_fini(struct cl_env *cle)
 {
-        CL_ENV_DEC(total);
-        lu_context_fini(&cle->ce_lu.le_ctx);
-        lu_context_fini(&cle->ce_ses);
-        OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
+       cl_env_dec(CS_total);
+       lu_context_fini(&cle->ce_lu.le_ctx);
+       lu_context_fini(&cle->ce_ses);
+       OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
 }
 
+/* Get a cl_env, either from the per-CPU cache for the current CPU, or by
+ * allocating a new one.
+ */
 static struct lu_env *cl_env_obtain(void *debug)
 {
        struct cl_env *cle;
        struct lu_env *env;
+       int cpu = get_cpu();
 
        ENTRY;
-       spin_lock(&cl_envs_guard);
-       LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
-       if (cl_envs_cached_nr > 0) {
+
+       read_lock(&cl_envs[cpu].cec_guard);
+       LASSERT(equi(cl_envs[cpu].cec_count == 0,
+               list_empty(&cl_envs[cpu].cec_envs)));
+       if (cl_envs[cpu].cec_count > 0) {
                int rc;
 
-               cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+               cle = container_of(cl_envs[cpu].cec_envs.next, struct cl_env,
+                                  ce_linkage);
                list_del_init(&cle->ce_linkage);
-               cl_envs_cached_nr--;
-               spin_unlock(&cl_envs_guard);
+               cl_envs[cpu].cec_count--;
+               read_unlock(&cl_envs[cpu].cec_guard);
+               put_cpu();
 
                 env = &cle->ce_lu;
                 rc = lu_env_refill(env);
@@ -818,7 +725,8 @@ static struct lu_env *cl_env_obtain(void *debug)
                         env = ERR_PTR(rc);
                 }
         } else {
-               spin_unlock(&cl_envs_guard);
+               read_unlock(&cl_envs[cpu].cec_guard);
+               put_cpu();
                env = cl_env_new(lu_context_tags_default,
                                 lu_session_tags_default, debug);
        }
@@ -830,33 +738,15 @@ static inline struct cl_env *cl_env_container(struct lu_env *env)
         return container_of(env, struct cl_env, ce_lu);
 }
 
-struct lu_env *cl_env_peek(int *refcheck)
-{
-        struct lu_env *env;
-        struct cl_env *cle;
-
-        CL_ENV_INC(lookup);
-
-        /* check that we don't go far from untrusted pointer */
-        CLASSERT(offsetof(struct cl_env, ce_magic) == 0);
-
-        env = NULL;
-        cle = cl_env_fetch();
-        if (cle != NULL) {
-                CL_ENV_INC(hit);
-                env = &cle->ce_lu;
-                *refcheck = ++cle->ce_ref;
-        }
-        CDEBUG(D_OTHER, "%d@%p\n", cle ? cle->ce_ref : 0, cle);
-        return env;
-}
-EXPORT_SYMBOL(cl_env_peek);
-
 /**
- * Returns lu_env: if there already is an environment associated with the
- * current thread, it is returned, otherwise, new environment is allocated.
+ * Returns an lu_env.
  *
- * Allocations are amortized through the global cache of environments.
+ * No link to thread, this returns an env from the cache or
+ * allocates a new one.
+ *
+ * If you need to get the specific environment you created for this thread,
+ * you must either pass the pointer directly or store it in the file/inode
+ * private data and retrieve it from there using ll_cl_add/ll_cl_find.
  *
  * \param refcheck pointer to a counter used to detect environment leaks. In
  * the usual case cl_env_get() and cl_env_put() are called in the same lexical
@@ -865,21 +755,17 @@ EXPORT_SYMBOL(cl_env_peek);
  *
  * \see cl_env_put()
  */
-struct lu_env *cl_env_get(int *refcheck)
+struct lu_env *cl_env_get(__u16 *refcheck)
 {
         struct lu_env *env;
 
-        env = cl_env_peek(refcheck);
-        if (env == NULL) {
-                env = cl_env_obtain(__builtin_return_address(0));
-                if (!IS_ERR(env)) {
-                        struct cl_env *cle;
+       env = cl_env_obtain(__builtin_return_address(0));
+       if (!IS_ERR(env)) {
+               struct cl_env *cle;
 
-                        cle = cl_env_container(env);
-                        cl_env_attach(cle);
-                        *refcheck = cle->ce_ref;
-                        CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
-                }
+               cle = cl_env_container(env);
+               *refcheck = cle->ce_ref;
+               CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
         }
         return env;
 }
@@ -890,11 +776,10 @@ EXPORT_SYMBOL(cl_env_get);
  *
  * \see cl_env_get()
  */
-struct lu_env *cl_env_alloc(int *refcheck, __u32 tags)
+struct lu_env *cl_env_alloc(__u16 *refcheck, __u32 tags)
 {
         struct lu_env *env;
 
-        LASSERT(cl_env_peek(refcheck) == NULL);
         env = cl_env_new(tags, tags, __builtin_return_address(0));
         if (!IS_ERR(env)) {
                 struct cl_env *cle;
@@ -909,7 +794,6 @@ EXPORT_SYMBOL(cl_env_alloc);
 
 static void cl_env_exit(struct cl_env *cle)
 {
-        LASSERT(cle->ce_owner == NULL);
         lu_context_exit(&cle->ce_lu.le_ctx);
         lu_context_exit(&cle->ce_ses);
 }
@@ -922,21 +806,26 @@ static void cl_env_exit(struct cl_env *cle)
 unsigned cl_env_cache_purge(unsigned nr)
 {
        struct cl_env *cle;
+       unsigned i;
 
        ENTRY;
-       spin_lock(&cl_envs_guard);
-       for (; !list_empty(&cl_envs) && nr > 0; --nr) {
-               cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
-               list_del_init(&cle->ce_linkage);
-               LASSERT(cl_envs_cached_nr > 0);
-               cl_envs_cached_nr--;
-               spin_unlock(&cl_envs_guard);
+       for_each_possible_cpu(i) {
+               write_lock(&cl_envs[i].cec_guard);
+               for (; !list_empty(&cl_envs[i].cec_envs) && nr > 0; --nr) {
+                       cle = container_of(cl_envs[i].cec_envs.next,
+                                          struct cl_env, ce_linkage);
+                       list_del_init(&cle->ce_linkage);
+                       LASSERT(cl_envs[i].cec_count > 0);
+                       cl_envs[i].cec_count--;
+                       write_unlock(&cl_envs[i].cec_guard);
 
-               cl_env_fini(cle);
-               spin_lock(&cl_envs_guard);
+                       cl_env_fini(cle);
+                       write_lock(&cl_envs[i].cec_guard);
+               }
+               LASSERT(equi(cl_envs[i].cec_count == 0,
+                       list_empty(&cl_envs[i].cec_envs)));
+               write_unlock(&cl_envs[i].cec_guard);
        }
-       LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
-       spin_unlock(&cl_envs_guard);
        RETURN(nr);
 }
 EXPORT_SYMBOL(cl_env_cache_purge);
@@ -945,10 +834,10 @@ EXPORT_SYMBOL(cl_env_cache_purge);
  * Release an environment.
  *
  * Decrement \a env reference counter. When counter drops to 0, nothing in
- * this thread is using environment and it is returned to the allocation
- * cache, or freed straight away, if cache is large enough.
+ * this thread is using environment and it is returned to the per-CPU cache or
+ * freed immediately if the cache is full.
  */
-void cl_env_put(struct lu_env *env, int *refcheck)
+void cl_env_put(struct lu_env *env, __u16 *refcheck)
 {
         struct cl_env *cle;
 
@@ -959,133 +848,44 @@ void cl_env_put(struct lu_env *env, int *refcheck)
 
         CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
         if (--cle->ce_ref == 0) {
-                CL_ENV_DEC(busy);
-                cl_env_detach(cle);
-                cle->ce_debug = NULL;
-                cl_env_exit(cle);
-                /*
-                 * Don't bother to take a lock here.
-                 *
-                 * Return environment to the cache only when it was allocated
-                 * with the standard tags.
-                 */
-                if (cl_envs_cached_nr < cl_envs_cached_max &&
-                    (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
-                    (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
-                       spin_lock(&cl_envs_guard);
-                       list_add(&cle->ce_linkage, &cl_envs);
-                       cl_envs_cached_nr++;
-                       spin_unlock(&cl_envs_guard);
+               int cpu = get_cpu();
+
+               cl_env_dec(CS_busy);
+               cle->ce_debug = NULL;
+               cl_env_exit(cle);
+               /*
+                * Don't bother to take a lock here.
+                *
+                * Return environment to the cache only when it was allocated
+                * with the standard tags.
+                */
+               if (cl_envs[cpu].cec_count < cl_envs_cached_max &&
+                   (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == lu_context_tags_default &&
+                   (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == lu_session_tags_default) {
+                       read_lock(&cl_envs[cpu].cec_guard);
+                       list_add(&cle->ce_linkage, &cl_envs[cpu].cec_envs);
+                       cl_envs[cpu].cec_count++;
+                       read_unlock(&cl_envs[cpu].cec_guard);
                } else
                        cl_env_fini(cle);
+               put_cpu();
        }
 }
 EXPORT_SYMBOL(cl_env_put);
 
 /**
- * Declares a point of re-entrancy.
- *
- * \see cl_env_reexit()
- */
-void *cl_env_reenter(void)
-{
-        return cl_env_detach(NULL);
-}
-EXPORT_SYMBOL(cl_env_reenter);
-
-/**
- * Exits re-entrancy.
- */
-void cl_env_reexit(void *cookie)
-{
-        cl_env_detach(NULL);
-        cl_env_attach(cookie);
-}
-EXPORT_SYMBOL(cl_env_reexit);
-
-/**
- * Setup user-supplied \a env as a current environment. This is to be used to
- * guaranteed that environment exists even when cl_env_get() fails. It is up
- * to user to ensure proper concurrency control.
- *
- * \see cl_env_unplant()
- */
-void cl_env_implant(struct lu_env *env, int *refcheck)
-{
-        struct cl_env *cle = cl_env_container(env);
-
-        LASSERT(cle->ce_ref > 0);
-
-        cl_env_attach(cle);
-        cl_env_get(refcheck);
-        CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
-}
-EXPORT_SYMBOL(cl_env_implant);
-
-/**
- * Detach environment installed earlier by cl_env_implant().
- */
-void cl_env_unplant(struct lu_env *env, int *refcheck)
-{
-        struct cl_env *cle = cl_env_container(env);
-
-        LASSERT(cle->ce_ref > 1);
-
-        CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
-
-        cl_env_detach(cle);
-        cl_env_put(env, refcheck);
-}
-EXPORT_SYMBOL(cl_env_unplant);
-
-struct lu_env *cl_env_nested_get(struct cl_env_nest *nest)
-{
-        struct lu_env *env;
-
-        nest->cen_cookie = NULL;
-        env = cl_env_peek(&nest->cen_refcheck);
-        if (env != NULL) {
-                if (!cl_io_is_going(env))
-                        return env;
-                else {
-                        cl_env_put(env, &nest->cen_refcheck);
-                        nest->cen_cookie = cl_env_reenter();
-                }
-        }
-        env = cl_env_get(&nest->cen_refcheck);
-        if (IS_ERR(env)) {
-                cl_env_reexit(nest->cen_cookie);
-                return env;
-        }
-
-        LASSERT(!cl_io_is_going(env));
-        return env;
-}
-EXPORT_SYMBOL(cl_env_nested_get);
-
-void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env)
-{
-        cl_env_put(env, &nest->cen_refcheck);
-        cl_env_reexit(nest->cen_cookie);
-}
-EXPORT_SYMBOL(cl_env_nested_put);
-
-/**
  * Converts struct cl_attr to struct ost_lvb.
  *
  * \see cl_lvb2attr
  */
 void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr)
 {
-        ENTRY;
         lvb->lvb_size   = attr->cat_size;
         lvb->lvb_mtime  = attr->cat_mtime;
         lvb->lvb_atime  = attr->cat_atime;
         lvb->lvb_ctime  = attr->cat_ctime;
         lvb->lvb_blocks = attr->cat_blocks;
-        EXIT;
 }
-EXPORT_SYMBOL(cl_attr2lvb);
 
 /**
  * Converts struct ost_lvb to struct cl_attr.
@@ -1094,17 +894,16 @@ EXPORT_SYMBOL(cl_attr2lvb);
  */
 void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 {
-        ENTRY;
         attr->cat_size   = lvb->lvb_size;
         attr->cat_mtime  = lvb->lvb_mtime;
         attr->cat_atime  = lvb->lvb_atime;
         attr->cat_ctime  = lvb->lvb_ctime;
         attr->cat_blocks = lvb->lvb_blocks;
-        EXIT;
 }
 EXPORT_SYMBOL(cl_lvb2attr);
 
 static struct cl_env cl_env_percpu[NR_CPUS];
+static DEFINE_MUTEX(cl_env_percpu_mutex);
 
 static int cl_env_percpu_init(void)
 {
@@ -1116,6 +915,10 @@ static int cl_env_percpu_init(void)
        for_each_possible_cpu(i) {
                struct lu_env *env;
 
+               rwlock_init(&cl_envs[i].cec_guard);
+               INIT_LIST_HEAD(&cl_envs[i].cec_envs);
+               cl_envs[i].cec_count = 0;
+
                cle = &cl_env_percpu[i];
                env = &cle->ce_lu;
 
@@ -1138,7 +941,7 @@ static int cl_env_percpu_init(void)
                /* Indices 0 to i (excluding i) were correctly initialized,
                 * thus we must uninitialize up to i, the rest are undefined. */
                for (j = 0; j < i; j++) {
-                       cle = &cl_env_percpu[i];
+                       cle = &cl_env_percpu[j];
                        lu_context_exit(&cle->ce_ses);
                        lu_context_fini(&cle->ce_ses);
                        lu_env_fini(&cle->ce_lu);
@@ -1165,8 +968,10 @@ static void cl_env_percpu_refill(void)
 {
        int i;
 
+       mutex_lock(&cl_env_percpu_mutex);
        for_each_possible_cpu(i)
                lu_env_refill(&cl_env_percpu[i].ce_lu);
+       mutex_unlock(&cl_env_percpu_mutex);
 }
 
 void cl_env_percpu_put(struct lu_env *env)
@@ -1181,22 +986,20 @@ void cl_env_percpu_put(struct lu_env *env)
        cle->ce_ref--;
        LASSERT(cle->ce_ref == 0);
 
-       CL_ENV_DEC(busy);
-       cl_env_detach(cle);
+       cl_env_dec(CS_busy);
        cle->ce_debug = NULL;
 
        put_cpu();
 }
 EXPORT_SYMBOL(cl_env_percpu_put);
 
-struct lu_env *cl_env_percpu_get()
+struct lu_env *cl_env_percpu_get(void)
 {
        struct cl_env *cle;
 
        cle = &cl_env_percpu[get_cpu()];
        cl_env_init0(cle, __builtin_return_address(0));
 
-       cl_env_attach(cle);
        return &cle->ce_lu;
 }
 EXPORT_SYMBOL(cl_env_percpu_get);
@@ -1255,68 +1058,34 @@ struct cl_thread_info *cl_env_info(const struct lu_env *env)
         return lu_context_key_get(&env->le_ctx, &cl_key);
 }
 
-/* defines cl0_key_{init,fini}() */
-LU_KEY_INIT_FINI(cl0, struct cl_thread_info);
-
-static void *cl_key_init(const struct lu_context *ctx,
-                         struct lu_context_key *key)
-{
-        struct cl_thread_info *info;
-
-        info = cl0_key_init(ctx, key);
-        if (!IS_ERR(info)) {
-               size_t i;
-
-                for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
-                        lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
-        }
-        return info;
-}
-
-static void cl_key_fini(const struct lu_context *ctx,
-                        struct lu_context_key *key, void *data)
-{
-       struct cl_thread_info *info;
-       size_t i;
-
-        info = data;
-        for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
-                lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
-        cl0_key_fini(ctx, key, data);
-}
-
-static void cl_key_exit(const struct lu_context *ctx,
-                        struct lu_context_key *key, void *data)
-{
-       struct cl_thread_info *info = data;
-       size_t i;
-
-        for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) {
-                LASSERT(info->clt_counters[i].ctc_nr_held == 0);
-                LASSERT(info->clt_counters[i].ctc_nr_used == 0);
-                LASSERT(info->clt_counters[i].ctc_nr_locks_acquired == 0);
-                LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
-                lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
-                lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
-        }
-}
+/* defines cl_key_{init,fini}() */
+LU_KEY_INIT_FINI(cl, struct cl_thread_info);
 
 static struct lu_context_key cl_key = {
         .lct_tags = LCT_CL_THREAD,
         .lct_init = cl_key_init,
         .lct_fini = cl_key_fini,
-        .lct_exit = cl_key_exit
 };
 
 static struct lu_kmem_descr cl_object_caches[] = {
-        {
-                .ckd_cache = &cl_env_kmem,
-                .ckd_name  = "cl_env_kmem",
-                .ckd_size  = sizeof (struct cl_env)
-        },
-        {
-                .ckd_cache = NULL
-        }
+       {
+               .ckd_cache = &cl_env_kmem,
+               .ckd_name  = "cl_env_kmem",
+               .ckd_size  = sizeof(struct cl_env)
+       },
+       {
+               .ckd_cache = &cl_dio_aio_kmem,
+               .ckd_name  = "cl_dio_aio_kmem",
+               .ckd_size  = sizeof(struct cl_dio_aio)
+       },
+       {
+               .ckd_cache = &cl_sub_dio_kmem,
+               .ckd_name  = "cl_sub_dio_kmem",
+               .ckd_size  = sizeof(struct cl_sub_dio)
+       },
+       {
+               .ckd_cache = NULL
+       }
 };
 
 /**
@@ -1329,35 +1098,33 @@ int cl_global_init(void)
 {
        int result;
 
-       INIT_LIST_HEAD(&cl_envs);
+       OBD_ALLOC_PTR_ARRAY(cl_envs, num_possible_cpus());
+       if (cl_envs == NULL)
+               GOTO(out, result = -ENOMEM);
 
-       result = cl_env_store_init();
+       result = lu_kmem_init(cl_object_caches);
        if (result)
-               return result;
+               GOTO(out_envs, result);
 
-        result = lu_kmem_init(cl_object_caches);
-        if (result)
-                goto out_store;
-
-        LU_CONTEXT_KEY_INIT(&cl_key);
-        result = lu_context_key_register(&cl_key);
-        if (result)
-                goto out_kmem;
+       LU_CONTEXT_KEY_INIT(&cl_key);
+       result = lu_context_key_register(&cl_key);
+       if (result)
+               GOTO(out_kmem, result);
 
        result = cl_env_percpu_init();
-       if (result)
-               /* no cl_env_percpu_fini on error */
-               goto out_context;
+       if (result) /* no cl_env_percpu_fini on error */
+               GOTO(out_keys, result);
 
-        return 0;
+       return 0;
 
-out_context:
-        lu_context_key_degister(&cl_key);
+out_keys:
+       lu_context_key_degister(&cl_key);
 out_kmem:
-        lu_kmem_fini(cl_object_caches);
-out_store:
-        cl_env_store_fini();
-        return result;
+       lu_kmem_fini(cl_object_caches);
+out_envs:
+       OBD_FREE_PTR_ARRAY(cl_envs, num_possible_cpus());
+out:
+       return result;
 }
 
 /**
@@ -1365,8 +1132,16 @@ out_store:
  */
 void cl_global_fini(void)
 {
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(cl_page_kmem_array); i++) {
+               if (cl_page_kmem_array[i]) {
+                       kmem_cache_destroy(cl_page_kmem_array[i]);
+                       cl_page_kmem_array[i] = NULL;
+               }
+       }
        cl_env_percpu_fini();
-        lu_context_key_degister(&cl_key);
-        lu_kmem_fini(cl_object_caches);
-        cl_env_store_fini();
+       lu_context_key_degister(&cl_key);
+       lu_kmem_fini(cl_object_caches);
+       OBD_FREE_PTR_ARRAY(cl_envs, num_possible_cpus());
 }