Whamcloud - gitweb
LU-2622 obdclass: Remove the global cl_env list
authorPrakash Surya <surya1@llnl.gov>
Wed, 30 Jan 2013 00:26:59 +0000 (16:26 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 15 Mar 2013 20:43:48 +0000 (16:43 -0400)
New cl_env structures are allocated using a SLAB cache specifically
created for allocating and freeing these structure. Without this patch,
when a thread is finished with a cl_env structure it places it on a
global list instead of freeing it back to the SLAB cache. With this
patch, this global list is completely removed, and cl_env structures are
released immediately back to the SLAB cache.

The motivation for this change essentially boils down to this secondary
global list cache being completely unnecessary, and only serving to
serialize any calls to cl_env_get and cl_env_put. This has proven to
cause a severe performance impact on large core count systems,
specifically during memory reclamation (i.e. ll_releasepage).

For example, on BG/Q Sequoia IO nodes, we've experienced nearly all
68 cores of a machine spinning on the lock protecting this global
list. Some example stack traces showcasing this problem were gathered
using sysrq-l, and are displayed below:

    CPU56:
    Call Trace:
    [c00000000fe3bb30] [c000000000008d1c] .show_stack+0x7c/0x184 (unreliable)
    [c00000000fe3bbe0] [c00000000027604c] .showacpu+0x64/0x94
    [c00000000fe3bc70] [c000000000068b30] .generic_smp_call_function_interrupt+0x10c/0x230
    [c00000000fe3bd40] [c00000000001d11c] .smp_message_recv+0x34/0x78
    [c00000000fe3bdc0] [c00000000002526c] .bgq_ipi_dispatch+0x118/0x18c
    [c00000000fe3be50] [c00000000007b20c] .handle_IRQ_event+0x88/0x18c
    [c00000000fe3bf00] [c00000000007dc90] .handle_percpu_irq+0x8c/0x100
    [c00000000fe3bf90] [c00000000001b808] .call_handle_irq+0x1c/0x2c
    [c0000003e1c4a4c0] [c0000000000059f0] .do_IRQ+0x154/0x1e0
    [c0000003e1c4a570] [c0000000000144dc] exc_external_input_book3e+0x110/0x114
    --- Exception: 501 at ._raw_spin_lock+0xd8/0x1a8
        LR = ._raw_spin_lock+0x104/0x1a8
    [c0000003e1c4a860] [8000000000b04f38] libcfs_nidstrings+0x2acc/0xfffffffffffe5824 [libcfs] (unreliable)
    [c0000003e1c4a910] [c00000000042d4cc] ._spin_lock+0x10/0x24
    [c0000003e1c4a980] [80000000024c2f4c] .cl_env_get+0xec/0x480 [obdclass]
    [c0000003e1c4aa60] [80000000024c336c] .cl_env_nested_get+0x8c/0xf0 [obdclass]
    [c0000003e1c4aaf0] [800000000692070c] .ll_releasepage+0xbc/0x200 [lustre]
    [c0000003e1c4aba0] [c000000000094110] .try_to_release_page+0x68/0x8c
    [c0000003e1c4ac10] [c0000000000a4190] .shrink_page_list.clone.0+0x3d8/0x63c
    [c0000003e1c4adc0] [c0000000000a47d8] .shrink_inactive_list+0x3e4/0x690
    [c0000003e1c4af90] [c0000000000a4f54] .shrink_zone+0x4d0/0x4d4
    [c0000003e1c4b0c0] [c0000000000a5a68] .try_to_free_pages+0x204/0x3d0
    [c0000003e1c4b220] [c00000000009d044] .__alloc_pages_nodemask+0x460/0x738
    [c0000003e1c4b3a0] [c000000000095af4] .grab_cache_page_write_begin+0x7c/0xec
    [c0000003e1c4b450] [8000000006920964] .ll_write_begin+0x94/0x270 [lustre]
    [c0000003e1c4b520] [c0000000000968c8] .generic_file_buffered_write+0x148/0x374
    [c0000003e1c4b660] [c000000000097050] .__generic_file_aio_write+0x374/0x3d8
    [c0000003e1c4b760] [c00000000009712c] .generic_file_aio_write+0x78/0xe8
    [c0000003e1c4b810] [800000000693ed4c] .vvp_io_write_start+0xfc/0x3e0 [lustre]
    [c0000003e1c4b8e0] [80000000024d9c6c] .cl_io_start+0xcc/0x220 [obdclass]
    [c0000003e1c4b980] [80000000024e1a84] .cl_io_loop+0x194/0x2c0 [obdclass]
    [c0000003e1c4ba30] [80000000068ba1d8] .ll_file_io_generic+0x498/0x670 [lustre]
    [c0000003e1c4bb30] [80000000068ba834] .ll_file_aio_write+0x1d4/0x3a0 [lustre]
    [c0000003e1c4bc00] [80000000068bab50] .ll_file_write+0x150/0x320 [lustre]
    [c0000003e1c4bce0] [c0000000000d1ba8] .vfs_write+0xd0/0x1c4
    [c0000003e1c4bd80] [c0000000000d1d98] .SyS_write+0x54/0x98
    [c0000003e1c4be30] [c000000000000580] syscall_exit+0x0/0x2c

    CPU63:
    Call Trace:
    [c00000000fe03b30] [c000000000008d1c] .show_stack+0x7c/0x184 (unreliable)
    [c00000000fe03be0] [c00000000027604c] .showacpu+0x64/0x94
    [c00000000fe03c70] [c000000000068b30] .generic_smp_call_function_interrupt+0x10c/0x230
    [c00000000fe03d40] [c00000000001d11c] .smp_message_recv+0x34/0x78
    [c00000000fe03dc0] [c00000000002526c] .bgq_ipi_dispatch+0x118/0x18c
    [c00000000fe03e50] [c00000000007b20c] .handle_IRQ_event+0x88/0x18c
    [c00000000fe03f00] [c00000000007dc90] .handle_percpu_irq+0x8c/0x100
    [c00000000fe03f90] [c00000000001b808] .call_handle_irq+0x1c/0x2c
    [c0000003c4f0a510] [c0000000000059f0] .do_IRQ+0x154/0x1e0
    [c0000003c4f0a5c0] [c0000000000144dc] exc_external_input_book3e+0x110/0x114
    --- Exception: 501 at ._raw_spin_lock+0xdc/0x1a8
        LR = ._raw_spin_lock+0x104/0x1a8
    [c0000003c4f0a8b0] [800000000697a578] msgdata.87439+0x20/0xfffffffffffccf88 [lustre] (unreliable)
    [c0000003c4f0a960] [c00000000042d4cc] ._spin_lock+0x10/0x24
    [c0000003c4f0a9d0] [80000000024c17e8] .cl_env_put+0x178/0x420 [obdclass]
    [c0000003c4f0aa70] [80000000024c1ab0] .cl_env_nested_put+0x20/0x40 [obdclass]
    [c0000003c4f0aaf0] [8000000006920794] .ll_releasepage+0x144/0x200 [lustre]
    [c0000003c4f0aba0] [c000000000094110] .try_to_release_page+0x68/0x8c
    [c0000003c4f0ac10] [c0000000000a4190] .shrink_page_list.clone.0+0x3d8/0x63c
    [c0000003c4f0adc0] [c0000000000a47d8] .shrink_inactive_list+0x3e4/0x690
    [c0000003c4f0af90] [c0000000000a4f54] .shrink_zone+0x4d0/0x4d4
    [c0000003c4f0b0c0] [c0000000000a5a68] .try_to_free_pages+0x204/0x3d0
    [c0000003c4f0b220] [c00000000009d044] .__alloc_pages_nodemask+0x460/0x738
    [c0000003c4f0b3a0] [c000000000095af4] .grab_cache_page_write_begin+0x7c/0xec
    [c0000003c4f0b450] [8000000006920964] .ll_write_begin+0x94/0x270 [lustre]
    [c0000003c4f0b520] [c0000000000968c8] .generic_file_buffered_write+0x148/0x374
    [c0000003c4f0b660] [c000000000097050] .__generic_file_aio_write+0x374/0x3d8
    [c0000003c4f0b760] [c00000000009712c] .generic_file_aio_write+0x78/0xe8
    [c0000003c4f0b810] [800000000693ed4c] .vvp_io_write_start+0xfc/0x3e0 [lustre]
    [c0000003c4f0b8e0] [80000000024d9c6c] .cl_io_start+0xcc/0x220 [obdclass]
    [c0000003c4f0b980] [80000000024e1a84] .cl_io_loop+0x194/0x2c0 [obdclass]
    [c0000003c4f0ba30] [80000000068ba1d8] .ll_file_io_generic+0x498/0x670 [lustre]
    [c0000003c4f0bb30] [80000000068ba834] .ll_file_aio_write+0x1d4/0x3a0 [lustre]
    [c0000003c4f0bc00] [80000000068bab50] .ll_file_write+0x150/0x320 [lustre]
    [c0000003c4f0bce0] [c0000000000d1ba8] .vfs_write+0xd0/0x1c4
    [c0000003c4f0bd80] [c0000000000d1d98] .SyS_write+0x54/0x98
    [c0000003c4f0be30] [c000000000000580] syscall_exit+0x0/0x2c

Signed-off-by: Prakash Surya <surya1@llnl.gov>
Change-Id: Ief4b524784e07d7677ecb8a9ce97a7b54ccc6f75
Reviewed-on: http://review.whamcloud.com/5204
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/cl_object.h
lustre/liblustre/llite_cl.c
lustre/llite/llite_lib.c
lustre/obdclass/cl_object.c
lustre/obdclass/lu_object.c

index f43bf17..f647b14 100644 (file)
@@ -3263,7 +3263,6 @@ void          *cl_env_reenter    (void);
 void           cl_env_reexit     (void *cookie);
 void           cl_env_implant    (struct lu_env *env, int *refcheck);
 void           cl_env_unplant    (struct lu_env *env, int *refcheck);
-unsigned       cl_env_cache_purge(unsigned nr);
 
 /** @} cl_env */
 
index 7374e4b..337a5ce 100644 (file)
@@ -791,6 +791,5 @@ int cl_sb_fini(struct llu_sb_info *sbi)
          * automatically when last device is destroyed).
          */
         lu_types_stop();
-        cl_env_cache_purge(~0);
         RETURN(0);
 }
index 9ac70e5..27bc963 100644 (file)
@@ -1117,8 +1117,6 @@ void ll_put_super(struct super_block *sb)
 
         lustre_common_put_super(sb);
 
-        cl_env_cache_purge(~0);
-
         cfs_module_put(THIS_MODULE);
 
         EXIT;
index a7692e1..1eb8e76 100644 (file)
@@ -533,12 +533,6 @@ EXPORT_SYMBOL(cl_site_stats_print);
  * bz20044, bz22683.
  */
 
-static CFS_LIST_HEAD(cl_envs);
-static unsigned cl_envs_cached_nr  = 0;
-static unsigned cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
-                                           * for now. */
-static DEFINE_SPINLOCK(cl_envs_guard);
-
 struct cl_env {
         void             *ce_magic;
         struct lu_env     ce_lu;
@@ -793,40 +787,6 @@ static void cl_env_fini(struct cl_env *cle)
         OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
 }
 
-static struct lu_env *cl_env_obtain(void *debug)
-{
-       struct cl_env *cle;
-       struct lu_env *env;
-
-       ENTRY;
-       spin_lock(&cl_envs_guard);
-       LASSERT(equi(cl_envs_cached_nr == 0, cfs_list_empty(&cl_envs)));
-       if (cl_envs_cached_nr > 0) {
-               int rc;
-
-               cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
-               cfs_list_del_init(&cle->ce_linkage);
-               cl_envs_cached_nr--;
-               spin_unlock(&cl_envs_guard);
-
-                env = &cle->ce_lu;
-                rc = lu_env_refill(env);
-                if (rc == 0) {
-                        cl_env_init0(cle, debug);
-                        lu_context_enter(&env->le_ctx);
-                        lu_context_enter(&cle->ce_ses);
-                } else {
-                        cl_env_fini(cle);
-                        env = ERR_PTR(rc);
-                }
-        } else {
-               spin_unlock(&cl_envs_guard);
-               env = cl_env_new(lu_context_tags_default,
-                                lu_session_tags_default, debug);
-       }
-       RETURN(env);
-}
-
 static inline struct cl_env *cl_env_container(struct lu_env *env)
 {
         return container_of(env, struct cl_env, ce_lu);
@@ -858,8 +818,6 @@ EXPORT_SYMBOL(cl_env_peek);
  * Returns lu_env: if there already is an environment associated with the
  * current thread, it is returned, otherwise, new environment is allocated.
  *
- * Allocations are amortized through the global cache of environments.
- *
  * \param refcheck pointer to a counter used to detect environment leaks. In
  * the usual case cl_env_get() and cl_env_put() are called in the same lexical
  * scope and pointer to the same integer is passed as \a refcheck. This is
@@ -873,7 +831,10 @@ struct lu_env *cl_env_get(int *refcheck)
 
         env = cl_env_peek(refcheck);
         if (env == NULL) {
-                env = cl_env_obtain(__builtin_return_address(0));
+               env = cl_env_new(lu_context_tags_default,
+                                lu_session_tags_default,
+                                __builtin_return_address(0));
+
                 if (!IS_ERR(env)) {
                         struct cl_env *cle;
 
@@ -917,33 +878,6 @@ static void cl_env_exit(struct cl_env *cle)
 }
 
 /**
- * Finalizes and frees a given number of cached environments. This is done to
- * (1) free some memory (not currently hooked into VM), or (2) release
- * references to modules.
- */
-unsigned cl_env_cache_purge(unsigned nr)
-{
-       struct cl_env *cle;
-
-       ENTRY;
-       spin_lock(&cl_envs_guard);
-       for (; !cfs_list_empty(&cl_envs) && nr > 0; --nr) {
-               cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
-               cfs_list_del_init(&cle->ce_linkage);
-               LASSERT(cl_envs_cached_nr > 0);
-               cl_envs_cached_nr--;
-               spin_unlock(&cl_envs_guard);
-
-               cl_env_fini(cle);
-               spin_lock(&cl_envs_guard);
-       }
-       LASSERT(equi(cl_envs_cached_nr == 0, cfs_list_empty(&cl_envs)));
-       spin_unlock(&cl_envs_guard);
-       RETURN(nr);
-}
-EXPORT_SYMBOL(cl_env_cache_purge);
-
-/**
  * Release an environment.
  *
  * Decrement \a env reference counter. When counter drops to 0, nothing in
@@ -965,21 +899,7 @@ void cl_env_put(struct lu_env *env, int *refcheck)
                 cl_env_detach(cle);
                 cle->ce_debug = NULL;
                 cl_env_exit(cle);
-                /*
-                 * Don't bother to take a lock here.
-                 *
-                 * Return environment to the cache only when it was allocated
-                 * with the standard tags.
-                 */
-                if (cl_envs_cached_nr < cl_envs_cached_max &&
-                    (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
-                    (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
-                       spin_lock(&cl_envs_guard);
-                       cfs_list_add(&cle->ce_linkage, &cl_envs);
-                       cl_envs_cached_nr++;
-                       spin_unlock(&cl_envs_guard);
-               } else
-                       cl_env_fini(cle);
+               cl_env_fini(cle);
        }
 }
 EXPORT_SYMBOL(cl_env_put);
index fd8c2d7..41add70 100644 (file)
@@ -1491,13 +1491,11 @@ static CFS_LIST_HEAD(lu_context_remembered);
 void lu_context_key_quiesce(struct lu_context_key *key)
 {
         struct lu_context *ctx;
-        extern unsigned cl_env_cache_purge(unsigned nr);
 
         if (!(key->lct_tags & LCT_QUIESCENT)) {
                 /*
                  * XXX layering violation.
                  */
-                cl_env_cache_purge(~0);
                 key->lct_tags |= LCT_QUIESCENT;
                 /*
                  * XXX memory barrier has to go here.