From 03364148c01ffe7bb2f2abfde84bbc36dc4d8237 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 16 Jan 2020 15:05:08 +0800 Subject: [PATCH] LU-13134 obdclass: use slab allocation for cl_page Currently we use kmalloc() for cl_page allocation, this is because cl_page size could be different with different objects. For most cases (except obdecho object) we have 408 bytes for cl_page. kmalloc() uses 512 bytes slab for it. So for 4KB PAGE_SIZE, we could allocate 8 cl_page in a single page. If we use private slab cache, it can fit 10 cl_page for 4KB PAGE_SIZE, and even more struct cl_page can fit if we reduce the struct size. For 4GB of pages the cl_page memory usage will shrink from 512M to 408M. With clients already having 100GB of memory, it will save us 2.5GB memory usage for cl_page, and some systems have as much as 24TB today! This patch uses a static array with N (currently 16) entries storing the { size, slab pointer }, with only 2-3 entries in current usage. If the static array is not large enough, then fall back to kmalloc(). Benchmark numbers: 4KB Random Read fio -iodepth=128 -direct=1 -size=2g -runtime=60 -numjobs=256 -group_reporting -directory=/ai400/out -create_serialize=0 -filename_format='f.$jobnum.$filenum' master+ master+ LU-4198 LU-4198+ LU-13134 delta QD=1, numjobs=1 4518 4538 +0.44% QD=128, numjobs=256 2177K 2207k +1.38% Change-Id: I565b09616b22706f93d4c0fdc0df396d06cd51cc Signed-off-by: Wang Shilong Reviewed-on: https://review.whamcloud.com/37225 Reviewed-by: Neil Brown Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/include/cl_object.h | 2 ++ lustre/obdclass/cl_internal.h | 2 ++ lustre/obdclass/cl_object.c | 10 ++++++ lustre/obdclass/cl_page.c | 75 ++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index d1e8f7a..a9d8a67 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -729,6 +729,8 @@ enum cl_page_type { struct cl_page { /** Reference counter. */ atomic_t cp_ref; + /* which slab kmem index this memory allocated from */ + int cp_kmem_index; /** An object this page is a part of. Immutable after creation. */ struct cl_object *cp_obj; /** vmpage */ diff --git a/lustre/obdclass/cl_internal.h b/lustre/obdclass/cl_internal.h index 8ef48ef..33bdddc 100644 --- a/lustre/obdclass/cl_internal.h +++ b/lustre/obdclass/cl_internal.h @@ -47,6 +47,8 @@ struct cl_thread_info { }; extern struct kmem_cache *cl_dio_aio_kmem; +extern struct kmem_cache *cl_page_kmem_array[16]; +extern unsigned short cl_page_kmem_size_array[16]; struct cl_thread_info *cl_env_info(const struct lu_env *env); void cl_page_disown0(const struct lu_env *env, diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index 1e39da3..9844ed0 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -58,6 +58,8 @@ static struct kmem_cache *cl_env_kmem; struct kmem_cache *cl_dio_aio_kmem; +struct kmem_cache *cl_page_kmem_array[16]; +unsigned short cl_page_kmem_size_array[16]; /** Lock class of cl_object_header::coh_attr_guard */ static struct lock_class_key cl_attr_guard_class; @@ -1106,6 +1108,14 @@ out: */ void cl_global_fini(void) { + int i; + + for (i = 0; i < ARRAY_SIZE(cl_page_kmem_array); i++) { + if (cl_page_kmem_array[i]) { + kmem_cache_destroy(cl_page_kmem_array[i]); + cl_page_kmem_array[i] = NULL; + } + } cl_env_percpu_fini(); lu_context_key_degister(&cl_key); lu_kmem_fini(cl_object_caches); diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index de21f79..e9d2b84 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -46,6 +46,7 @@ #include "cl_internal.h" static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg); +static DEFINE_MUTEX(cl_page_kmem_mutex); #ifdef LIBCFS_DEBUG # define PASSERT(env, page, expr) \ @@ -142,11 +143,24 @@ cl_page_at_trusted(const struct cl_page *page, RETURN(NULL); } +static void __cl_page_free(struct cl_page *cl_page, unsigned short bufsize) +{ + int index = cl_page->cp_kmem_index; + + if (index >= 0) { + LASSERT(index < ARRAY_SIZE(cl_page_kmem_array)); + LASSERT(cl_page_kmem_size_array[index] == bufsize); + OBD_SLAB_FREE(cl_page, cl_page_kmem_array[index], bufsize); + } else { + OBD_FREE(cl_page, bufsize); + } +} + static void cl_page_free(const struct lu_env *env, struct cl_page *page, struct pagevec *pvec) { struct cl_object *obj = page->cp_obj; - int pagesize = cl_object_header(obj)->coh_page_bufsize; + unsigned short bufsize = cl_object_header(obj)->coh_page_bufsize; PASSERT(env, page, list_empty(&page->cp_batch)); PASSERT(env, page, page->cp_owner == NULL); @@ -167,7 +181,7 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page, lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page); cl_object_put(env, obj); lu_ref_fini(&page->cp_reference); - OBD_FREE(page, pagesize); + __cl_page_free(page, bufsize); EXIT; } @@ -182,6 +196,59 @@ static inline void cl_page_state_set_trust(struct cl_page *page, *(enum cl_page_state *)&page->cp_state = state; } +static struct cl_page *__cl_page_alloc(struct cl_object *o) +{ + int i = 0; + struct cl_page *cl_page = NULL; + unsigned short bufsize = cl_object_header(o)->coh_page_bufsize; + +check: + /* the number of entries in cl_page_kmem_array is expected to + * only be 2-3 entries, so the lookup overhead should be low. + */ + for ( ; i < ARRAY_SIZE(cl_page_kmem_array); i++) { + if (smp_load_acquire(&cl_page_kmem_size_array[i]) + == bufsize) { + OBD_SLAB_ALLOC_GFP(cl_page, cl_page_kmem_array[i], + bufsize, GFP_NOFS); + if (cl_page) + cl_page->cp_kmem_index = i; + return cl_page; + } + if (cl_page_kmem_size_array[i] == 0) + break; + } + + if (i < ARRAY_SIZE(cl_page_kmem_array)) { + char cache_name[32]; + + mutex_lock(&cl_page_kmem_mutex); + if (cl_page_kmem_size_array[i]) { + mutex_unlock(&cl_page_kmem_mutex); + goto check; + } + snprintf(cache_name, sizeof(cache_name), + "cl_page_kmem-%u", bufsize); + cl_page_kmem_array[i] = + kmem_cache_create(cache_name, bufsize, + 0, 0, NULL); + if (cl_page_kmem_array[i] == NULL) { + mutex_unlock(&cl_page_kmem_mutex); + return NULL; + } + smp_store_release(&cl_page_kmem_size_array[i], + bufsize); + mutex_unlock(&cl_page_kmem_mutex); + goto check; + } else { + OBD_ALLOC_GFP(cl_page, bufsize, GFP_NOFS); + if (cl_page) + cl_page->cp_kmem_index = -1; + } + + return cl_page; +} + struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o, pgoff_t ind, struct page *vmpage, enum cl_page_type type) @@ -190,8 +257,8 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, struct lu_object_header *head; ENTRY; - OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize, - GFP_NOFS); + + page = __cl_page_alloc(o); if (page != NULL) { int result = 0; atomic_set(&page->cp_ref, 1); -- 1.8.3.1