Whamcloud - gitweb
LU-13134 obdclass: use slab allocation for cl_page 25/37225/17
authorWang Shilong <wshilong@ddn.com>
Thu, 16 Jan 2020 07:05:08 +0000 (15:05 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Apr 2020 17:18:58 +0000 (17:18 +0000)
Currently we use kmalloc() for cl_page allocation, this is because
cl_page size could be different with different objects.

For most cases (except obdecho object) we have 408 bytes for cl_page.
kmalloc() uses 512 bytes slab for it. So for 4KB PAGE_SIZE, we could
allocate 8 cl_page in a single page.

If we use private slab cache, it can fit 10 cl_page for 4KB PAGE_SIZE,
and even more struct cl_page can fit if we reduce the struct size.
For 4GB of pages the cl_page memory usage will shrink from 512M to 408M.

With clients already having 100GB of memory, it will save us 2.5GB
memory usage for cl_page, and some systems have as much as 24TB today!

This patch uses a static array with N (currently 16) entries storing
the { size, slab pointer }, with only 2-3 entries in current usage.
If the static array is not large enough, then fall back to kmalloc().

Benchmark numbers:

    4KB Random Read
    fio -iodepth=128 -direct=1 -size=2g -runtime=60 -numjobs=256
        -group_reporting  -directory=/ai400/out -create_serialize=0
        -filename_format='f.$jobnum.$filenum'

                        master+     master+
                        LU-4198     LU-4198+
                                    LU-13134   delta
      QD=1, numjobs=1     4518       4538      +0.44%
    QD=128, numjobs=256   2177K      2207k     +1.38%

Change-Id: I565b09616b22706f93d4c0fdc0df396d06cd51cc
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/37225
Reviewed-by: Neil Brown <neilb@suse.de>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/cl_object.h
lustre/obdclass/cl_internal.h
lustre/obdclass/cl_object.c
lustre/obdclass/cl_page.c

index d1e8f7a..a9d8a67 100644 (file)
@@ -729,6 +729,8 @@ enum cl_page_type {
 struct cl_page {
        /** Reference counter. */
        atomic_t                 cp_ref;
 struct cl_page {
        /** Reference counter. */
        atomic_t                 cp_ref;
+       /* which slab kmem index this memory allocated from */
+       int                      cp_kmem_index;
        /** An object this page is a part of. Immutable after creation. */
        struct cl_object        *cp_obj;
        /** vmpage */
        /** An object this page is a part of. Immutable after creation. */
        struct cl_object        *cp_obj;
        /** vmpage */
index 8ef48ef..33bdddc 100644 (file)
@@ -47,6 +47,8 @@ struct cl_thread_info {
 };
 
 extern struct kmem_cache *cl_dio_aio_kmem;
 };
 
 extern struct kmem_cache *cl_dio_aio_kmem;
+extern struct kmem_cache *cl_page_kmem_array[16];
+extern unsigned short cl_page_kmem_size_array[16];
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env);
 void cl_page_disown0(const struct lu_env *env,
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env);
 void cl_page_disown0(const struct lu_env *env,
index 1e39da3..9844ed0 100644 (file)
@@ -58,6 +58,8 @@
 
 static struct kmem_cache *cl_env_kmem;
 struct kmem_cache *cl_dio_aio_kmem;
 
 static struct kmem_cache *cl_env_kmem;
 struct kmem_cache *cl_dio_aio_kmem;
+struct kmem_cache *cl_page_kmem_array[16];
+unsigned short cl_page_kmem_size_array[16];
 
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
 
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
@@ -1106,6 +1108,14 @@ out:
  */
 void cl_global_fini(void)
 {
  */
 void cl_global_fini(void)
 {
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(cl_page_kmem_array); i++) {
+               if (cl_page_kmem_array[i]) {
+                       kmem_cache_destroy(cl_page_kmem_array[i]);
+                       cl_page_kmem_array[i] = NULL;
+               }
+       }
        cl_env_percpu_fini();
        lu_context_key_degister(&cl_key);
        lu_kmem_fini(cl_object_caches);
        cl_env_percpu_fini();
        lu_context_key_degister(&cl_key);
        lu_kmem_fini(cl_object_caches);
index de21f79..e9d2b84 100644 (file)
@@ -46,6 +46,7 @@
 #include "cl_internal.h"
 
 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
 #include "cl_internal.h"
 
 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
+static DEFINE_MUTEX(cl_page_kmem_mutex);
 
 #ifdef LIBCFS_DEBUG
 # define PASSERT(env, page, expr)                                       \
 
 #ifdef LIBCFS_DEBUG
 # define PASSERT(env, page, expr)                                       \
@@ -142,11 +143,24 @@ cl_page_at_trusted(const struct cl_page *page,
        RETURN(NULL);
 }
 
        RETURN(NULL);
 }
 
+static void __cl_page_free(struct cl_page *cl_page, unsigned short bufsize)
+{
+       int index = cl_page->cp_kmem_index;
+
+       if (index >= 0) {
+               LASSERT(index < ARRAY_SIZE(cl_page_kmem_array));
+               LASSERT(cl_page_kmem_size_array[index] == bufsize);
+               OBD_SLAB_FREE(cl_page, cl_page_kmem_array[index], bufsize);
+       } else {
+               OBD_FREE(cl_page, bufsize);
+       }
+}
+
 static void cl_page_free(const struct lu_env *env, struct cl_page *page,
                         struct pagevec *pvec)
 {
        struct cl_object *obj  = page->cp_obj;
 static void cl_page_free(const struct lu_env *env, struct cl_page *page,
                         struct pagevec *pvec)
 {
        struct cl_object *obj  = page->cp_obj;
-       int pagesize = cl_object_header(obj)->coh_page_bufsize;
+       unsigned short bufsize = cl_object_header(obj)->coh_page_bufsize;
 
        PASSERT(env, page, list_empty(&page->cp_batch));
        PASSERT(env, page, page->cp_owner == NULL);
 
        PASSERT(env, page, list_empty(&page->cp_batch));
        PASSERT(env, page, page->cp_owner == NULL);
@@ -167,7 +181,7 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page,
        lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
        cl_object_put(env, obj);
        lu_ref_fini(&page->cp_reference);
        lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
        cl_object_put(env, obj);
        lu_ref_fini(&page->cp_reference);
-       OBD_FREE(page, pagesize);
+       __cl_page_free(page, bufsize);
        EXIT;
 }
 
        EXIT;
 }
 
@@ -182,6 +196,59 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
         *(enum cl_page_state *)&page->cp_state = state;
 }
 
         *(enum cl_page_state *)&page->cp_state = state;
 }
 
+static struct cl_page *__cl_page_alloc(struct cl_object *o)
+{
+       int i = 0;
+       struct cl_page *cl_page = NULL;
+       unsigned short bufsize = cl_object_header(o)->coh_page_bufsize;
+
+check:
+       /* the number of entries in cl_page_kmem_array is expected to
+        * only be 2-3 entries, so the lookup overhead should be low.
+        */
+       for ( ; i < ARRAY_SIZE(cl_page_kmem_array); i++) {
+               if (smp_load_acquire(&cl_page_kmem_size_array[i])
+                   == bufsize) {
+                       OBD_SLAB_ALLOC_GFP(cl_page, cl_page_kmem_array[i],
+                                          bufsize, GFP_NOFS);
+                       if (cl_page)
+                               cl_page->cp_kmem_index = i;
+                       return cl_page;
+               }
+               if (cl_page_kmem_size_array[i] == 0)
+                       break;
+       }
+
+       if (i < ARRAY_SIZE(cl_page_kmem_array)) {
+               char cache_name[32];
+
+               mutex_lock(&cl_page_kmem_mutex);
+               if (cl_page_kmem_size_array[i]) {
+                       mutex_unlock(&cl_page_kmem_mutex);
+                       goto check;
+               }
+               snprintf(cache_name, sizeof(cache_name),
+                        "cl_page_kmem-%u", bufsize);
+               cl_page_kmem_array[i] =
+                       kmem_cache_create(cache_name, bufsize,
+                                         0, 0, NULL);
+               if (cl_page_kmem_array[i] == NULL) {
+                       mutex_unlock(&cl_page_kmem_mutex);
+                       return NULL;
+               }
+               smp_store_release(&cl_page_kmem_size_array[i],
+                                 bufsize);
+               mutex_unlock(&cl_page_kmem_mutex);
+               goto check;
+       } else {
+               OBD_ALLOC_GFP(cl_page, bufsize, GFP_NOFS);
+               if (cl_page)
+                       cl_page->cp_kmem_index = -1;
+       }
+
+       return cl_page;
+}
+
 struct cl_page *cl_page_alloc(const struct lu_env *env,
                struct cl_object *o, pgoff_t ind, struct page *vmpage,
                enum cl_page_type type)
 struct cl_page *cl_page_alloc(const struct lu_env *env,
                struct cl_object *o, pgoff_t ind, struct page *vmpage,
                enum cl_page_type type)
@@ -190,8 +257,8 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
        struct lu_object_header *head;
 
        ENTRY;
        struct lu_object_header *head;
 
        ENTRY;
-       OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
-                       GFP_NOFS);
+
+       page = __cl_page_alloc(o);
        if (page != NULL) {
                int result = 0;
                atomic_set(&page->cp_ref, 1);
        if (page != NULL) {
                int result = 0;
                atomic_set(&page->cp_ref, 1);