From d5390bbde9b525b13ab91f5610abe1c4bb98eff9 Mon Sep 17 00:00:00 2001 From: Prakash Surya Date: Tue, 29 Jan 2013 16:26:59 -0800 Subject: [PATCH 1/1] LU-2622 obdclass: Remove the global cl_env list New cl_env structures are allocated using a SLAB cache specifically created for allocating and freeing these structure. Without this patch, when a thread is finished with a cl_env structure it places it on a global list instead of freeing it back to the SLAB cache. With this patch, this global list is completely removed, and cl_env structures are released immediately back to the SLAB cache. The motivation for this change essentially boils down to this secondary global list cache being completely unnecessary, and only serving to serialize any calls to cl_env_get and cl_env_put. This has proven to cause a severe performance impact on large core count systems, specifically during memory reclamation (i.e. ll_releasepage). For example, on BG/Q Sequoia IO nodes, we've experienced nearly all 68 cores of a machine spinning on the lock protecting this global list. Some example stack traces showcasing this problem were gathered using sysrq-l, and are displayed below: CPU56: Call Trace: [c00000000fe3bb30] [c000000000008d1c] .show_stack+0x7c/0x184 (unreliable) [c00000000fe3bbe0] [c00000000027604c] .showacpu+0x64/0x94 [c00000000fe3bc70] [c000000000068b30] .generic_smp_call_function_interrupt+0x10c/0x230 [c00000000fe3bd40] [c00000000001d11c] .smp_message_recv+0x34/0x78 [c00000000fe3bdc0] [c00000000002526c] .bgq_ipi_dispatch+0x118/0x18c [c00000000fe3be50] [c00000000007b20c] .handle_IRQ_event+0x88/0x18c [c00000000fe3bf00] [c00000000007dc90] .handle_percpu_irq+0x8c/0x100 [c00000000fe3bf90] [c00000000001b808] .call_handle_irq+0x1c/0x2c [c0000003e1c4a4c0] [c0000000000059f0] .do_IRQ+0x154/0x1e0 [c0000003e1c4a570] [c0000000000144dc] exc_external_input_book3e+0x110/0x114 --- Exception: 501 at ._raw_spin_lock+0xd8/0x1a8 LR = ._raw_spin_lock+0x104/0x1a8 [c0000003e1c4a860] [8000000000b04f38] libcfs_nidstrings+0x2acc/0xfffffffffffe5824 [libcfs] (unreliable) [c0000003e1c4a910] [c00000000042d4cc] ._spin_lock+0x10/0x24 [c0000003e1c4a980] [80000000024c2f4c] .cl_env_get+0xec/0x480 [obdclass] [c0000003e1c4aa60] [80000000024c336c] .cl_env_nested_get+0x8c/0xf0 [obdclass] [c0000003e1c4aaf0] [800000000692070c] .ll_releasepage+0xbc/0x200 [lustre] [c0000003e1c4aba0] [c000000000094110] .try_to_release_page+0x68/0x8c [c0000003e1c4ac10] [c0000000000a4190] .shrink_page_list.clone.0+0x3d8/0x63c [c0000003e1c4adc0] [c0000000000a47d8] .shrink_inactive_list+0x3e4/0x690 [c0000003e1c4af90] [c0000000000a4f54] .shrink_zone+0x4d0/0x4d4 [c0000003e1c4b0c0] [c0000000000a5a68] .try_to_free_pages+0x204/0x3d0 [c0000003e1c4b220] [c00000000009d044] .__alloc_pages_nodemask+0x460/0x738 [c0000003e1c4b3a0] [c000000000095af4] .grab_cache_page_write_begin+0x7c/0xec [c0000003e1c4b450] [8000000006920964] .ll_write_begin+0x94/0x270 [lustre] [c0000003e1c4b520] [c0000000000968c8] .generic_file_buffered_write+0x148/0x374 [c0000003e1c4b660] [c000000000097050] .__generic_file_aio_write+0x374/0x3d8 [c0000003e1c4b760] [c00000000009712c] .generic_file_aio_write+0x78/0xe8 [c0000003e1c4b810] [800000000693ed4c] .vvp_io_write_start+0xfc/0x3e0 [lustre] [c0000003e1c4b8e0] [80000000024d9c6c] .cl_io_start+0xcc/0x220 [obdclass] [c0000003e1c4b980] [80000000024e1a84] .cl_io_loop+0x194/0x2c0 [obdclass] [c0000003e1c4ba30] [80000000068ba1d8] .ll_file_io_generic+0x498/0x670 [lustre] [c0000003e1c4bb30] [80000000068ba834] .ll_file_aio_write+0x1d4/0x3a0 [lustre] [c0000003e1c4bc00] [80000000068bab50] .ll_file_write+0x150/0x320 [lustre] [c0000003e1c4bce0] [c0000000000d1ba8] .vfs_write+0xd0/0x1c4 [c0000003e1c4bd80] [c0000000000d1d98] .SyS_write+0x54/0x98 [c0000003e1c4be30] [c000000000000580] syscall_exit+0x0/0x2c CPU63: Call Trace: [c00000000fe03b30] [c000000000008d1c] .show_stack+0x7c/0x184 (unreliable) [c00000000fe03be0] [c00000000027604c] .showacpu+0x64/0x94 [c00000000fe03c70] [c000000000068b30] .generic_smp_call_function_interrupt+0x10c/0x230 [c00000000fe03d40] [c00000000001d11c] .smp_message_recv+0x34/0x78 [c00000000fe03dc0] [c00000000002526c] .bgq_ipi_dispatch+0x118/0x18c [c00000000fe03e50] [c00000000007b20c] .handle_IRQ_event+0x88/0x18c [c00000000fe03f00] [c00000000007dc90] .handle_percpu_irq+0x8c/0x100 [c00000000fe03f90] [c00000000001b808] .call_handle_irq+0x1c/0x2c [c0000003c4f0a510] [c0000000000059f0] .do_IRQ+0x154/0x1e0 [c0000003c4f0a5c0] [c0000000000144dc] exc_external_input_book3e+0x110/0x114 --- Exception: 501 at ._raw_spin_lock+0xdc/0x1a8 LR = ._raw_spin_lock+0x104/0x1a8 [c0000003c4f0a8b0] [800000000697a578] msgdata.87439+0x20/0xfffffffffffccf88 [lustre] (unreliable) [c0000003c4f0a960] [c00000000042d4cc] ._spin_lock+0x10/0x24 [c0000003c4f0a9d0] [80000000024c17e8] .cl_env_put+0x178/0x420 [obdclass] [c0000003c4f0aa70] [80000000024c1ab0] .cl_env_nested_put+0x20/0x40 [obdclass] [c0000003c4f0aaf0] [8000000006920794] .ll_releasepage+0x144/0x200 [lustre] [c0000003c4f0aba0] [c000000000094110] .try_to_release_page+0x68/0x8c [c0000003c4f0ac10] [c0000000000a4190] .shrink_page_list.clone.0+0x3d8/0x63c [c0000003c4f0adc0] [c0000000000a47d8] .shrink_inactive_list+0x3e4/0x690 [c0000003c4f0af90] [c0000000000a4f54] .shrink_zone+0x4d0/0x4d4 [c0000003c4f0b0c0] [c0000000000a5a68] .try_to_free_pages+0x204/0x3d0 [c0000003c4f0b220] [c00000000009d044] .__alloc_pages_nodemask+0x460/0x738 [c0000003c4f0b3a0] [c000000000095af4] .grab_cache_page_write_begin+0x7c/0xec [c0000003c4f0b450] [8000000006920964] .ll_write_begin+0x94/0x270 [lustre] [c0000003c4f0b520] [c0000000000968c8] .generic_file_buffered_write+0x148/0x374 [c0000003c4f0b660] [c000000000097050] .__generic_file_aio_write+0x374/0x3d8 [c0000003c4f0b760] [c00000000009712c] .generic_file_aio_write+0x78/0xe8 [c0000003c4f0b810] [800000000693ed4c] .vvp_io_write_start+0xfc/0x3e0 [lustre] [c0000003c4f0b8e0] [80000000024d9c6c] .cl_io_start+0xcc/0x220 [obdclass] [c0000003c4f0b980] [80000000024e1a84] .cl_io_loop+0x194/0x2c0 [obdclass] [c0000003c4f0ba30] [80000000068ba1d8] .ll_file_io_generic+0x498/0x670 [lustre] [c0000003c4f0bb30] [80000000068ba834] .ll_file_aio_write+0x1d4/0x3a0 [lustre] [c0000003c4f0bc00] [80000000068bab50] .ll_file_write+0x150/0x320 [lustre] [c0000003c4f0bce0] [c0000000000d1ba8] .vfs_write+0xd0/0x1c4 [c0000003c4f0bd80] [c0000000000d1d98] .SyS_write+0x54/0x98 [c0000003c4f0be30] [c000000000000580] syscall_exit+0x0/0x2c Signed-off-by: Prakash Surya Change-Id: Ief4b524784e07d7677ecb8a9ce97a7b54ccc6f75 Reviewed-on: http://review.whamcloud.com/5204 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Keith Mannthey Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 1 - lustre/liblustre/llite_cl.c | 1 - lustre/llite/llite_lib.c | 2 - lustre/obdclass/cl_object.c | 90 +++------------------------------------------ lustre/obdclass/lu_object.c | 2 - 5 files changed, 5 insertions(+), 91 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index f43bf17..f647b14 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -3263,7 +3263,6 @@ void *cl_env_reenter (void); void cl_env_reexit (void *cookie); void cl_env_implant (struct lu_env *env, int *refcheck); void cl_env_unplant (struct lu_env *env, int *refcheck); -unsigned cl_env_cache_purge(unsigned nr); /** @} cl_env */ diff --git a/lustre/liblustre/llite_cl.c b/lustre/liblustre/llite_cl.c index 7374e4b..337a5ce 100644 --- a/lustre/liblustre/llite_cl.c +++ b/lustre/liblustre/llite_cl.c @@ -791,6 +791,5 @@ int cl_sb_fini(struct llu_sb_info *sbi) * automatically when last device is destroyed). */ lu_types_stop(); - cl_env_cache_purge(~0); RETURN(0); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 9ac70e5..27bc963 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1117,8 +1117,6 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); - cl_env_cache_purge(~0); - cfs_module_put(THIS_MODULE); EXIT; diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index a7692e1..1eb8e76 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -533,12 +533,6 @@ EXPORT_SYMBOL(cl_site_stats_print); * bz20044, bz22683. */ -static CFS_LIST_HEAD(cl_envs); -static unsigned cl_envs_cached_nr = 0; -static unsigned cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit - * for now. */ -static DEFINE_SPINLOCK(cl_envs_guard); - struct cl_env { void *ce_magic; struct lu_env ce_lu; @@ -793,40 +787,6 @@ static void cl_env_fini(struct cl_env *cle) OBD_SLAB_FREE_PTR(cle, cl_env_kmem); } -static struct lu_env *cl_env_obtain(void *debug) -{ - struct cl_env *cle; - struct lu_env *env; - - ENTRY; - spin_lock(&cl_envs_guard); - LASSERT(equi(cl_envs_cached_nr == 0, cfs_list_empty(&cl_envs))); - if (cl_envs_cached_nr > 0) { - int rc; - - cle = container_of(cl_envs.next, struct cl_env, ce_linkage); - cfs_list_del_init(&cle->ce_linkage); - cl_envs_cached_nr--; - spin_unlock(&cl_envs_guard); - - env = &cle->ce_lu; - rc = lu_env_refill(env); - if (rc == 0) { - cl_env_init0(cle, debug); - lu_context_enter(&env->le_ctx); - lu_context_enter(&cle->ce_ses); - } else { - cl_env_fini(cle); - env = ERR_PTR(rc); - } - } else { - spin_unlock(&cl_envs_guard); - env = cl_env_new(lu_context_tags_default, - lu_session_tags_default, debug); - } - RETURN(env); -} - static inline struct cl_env *cl_env_container(struct lu_env *env) { return container_of(env, struct cl_env, ce_lu); @@ -858,8 +818,6 @@ EXPORT_SYMBOL(cl_env_peek); * Returns lu_env: if there already is an environment associated with the * current thread, it is returned, otherwise, new environment is allocated. * - * Allocations are amortized through the global cache of environments. - * * \param refcheck pointer to a counter used to detect environment leaks. In * the usual case cl_env_get() and cl_env_put() are called in the same lexical * scope and pointer to the same integer is passed as \a refcheck. This is @@ -873,7 +831,10 @@ struct lu_env *cl_env_get(int *refcheck) env = cl_env_peek(refcheck); if (env == NULL) { - env = cl_env_obtain(__builtin_return_address(0)); + env = cl_env_new(lu_context_tags_default, + lu_session_tags_default, + __builtin_return_address(0)); + if (!IS_ERR(env)) { struct cl_env *cle; @@ -917,33 +878,6 @@ static void cl_env_exit(struct cl_env *cle) } /** - * Finalizes and frees a given number of cached environments. This is done to - * (1) free some memory (not currently hooked into VM), or (2) release - * references to modules. - */ -unsigned cl_env_cache_purge(unsigned nr) -{ - struct cl_env *cle; - - ENTRY; - spin_lock(&cl_envs_guard); - for (; !cfs_list_empty(&cl_envs) && nr > 0; --nr) { - cle = container_of(cl_envs.next, struct cl_env, ce_linkage); - cfs_list_del_init(&cle->ce_linkage); - LASSERT(cl_envs_cached_nr > 0); - cl_envs_cached_nr--; - spin_unlock(&cl_envs_guard); - - cl_env_fini(cle); - spin_lock(&cl_envs_guard); - } - LASSERT(equi(cl_envs_cached_nr == 0, cfs_list_empty(&cl_envs))); - spin_unlock(&cl_envs_guard); - RETURN(nr); -} -EXPORT_SYMBOL(cl_env_cache_purge); - -/** * Release an environment. * * Decrement \a env reference counter. When counter drops to 0, nothing in @@ -965,21 +899,7 @@ void cl_env_put(struct lu_env *env, int *refcheck) cl_env_detach(cle); cle->ce_debug = NULL; cl_env_exit(cle); - /* - * Don't bother to take a lock here. - * - * Return environment to the cache only when it was allocated - * with the standard tags. - */ - if (cl_envs_cached_nr < cl_envs_cached_max && - (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD && - (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) { - spin_lock(&cl_envs_guard); - cfs_list_add(&cle->ce_linkage, &cl_envs); - cl_envs_cached_nr++; - spin_unlock(&cl_envs_guard); - } else - cl_env_fini(cle); + cl_env_fini(cle); } } EXPORT_SYMBOL(cl_env_put); diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index fd8c2d7..41add70 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -1491,13 +1491,11 @@ static CFS_LIST_HEAD(lu_context_remembered); void lu_context_key_quiesce(struct lu_context_key *key) { struct lu_context *ctx; - extern unsigned cl_env_cache_purge(unsigned nr); if (!(key->lct_tags & LCT_QUIESCENT)) { /* * XXX layering violation. */ - cl_env_cache_purge(~0); key->lct_tags |= LCT_QUIESCENT; /* * XXX memory barrier has to go here. -- 1.8.3.1