Whamcloud - gitweb
LU-15058 libcfs: introduce genradix support 90/45890/18
authorJames Simmons <jsimmons@infradead.org>
Wed, 2 Nov 2022 16:50:34 +0000 (12:50 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 14 Nov 2022 08:24:46 +0000 (08:24 +0000)
For a long time it has been known that vmalloc allocation have
a performance penalty. So another solution was introduced using a
generic radix tree to manage page size allocations. This new
functionality has huge potential to give us large performance gains.
Also this new API has an advantage over vmalloc for the case of not
knowing ahead of time how much to allocate. This is the case of a
Netlink data stream that is sending a array of data. We can use this
for the case of needing to grow an array instead of guessing how big
the data is coming.

Test-Parameters: trivial
Change-Id: I2d7b78569defd651fbb75bfbd82e0d805aff436e
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/45890
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/linux/Makefile.am
libcfs/include/libcfs/linux/generic-radix-tree.h [new file with mode: 0644]
libcfs/libcfs/Makefile.in
libcfs/libcfs/linux/Makefile.am
libcfs/libcfs/linux/generic-radix-tree.c [new file with mode: 0644]

index 770db1a..525de78 100644 (file)
@@ -1489,6 +1489,20 @@ EXTRA_KCFLAGS="$tmp_flags"
 ]) # LIBCFS_HAVE_IOV_ITER_TYPE
 
 #
+# LIBCFS_GENRADIX
+#
+# Kernel 5.0 commit ba20ba2e3743bac786dff777954c11930256075e
+# implemented generic radix trees to handle very large memory
+# allocation that can be used instead of vmalloc which has
+# a performance penalty.
+#
+AC_DEFUN([LIBCFS_GENRADIX], [
+LB_CHECK_EXPORT([__genradix_ptr], [lib/generic-radix-tree.c],
+       [AC_DEFINE(HAVE_GENRADIX_SUPPORT, 1,
+               [generic-radix-tree is present])])
+]) # LIBCFS_GENRADIX
+
+#
 # LIBCFS_GET_REQUEST_KEY_AUTH
 #
 # kernel 5.0 commit 822ad64d7e46a8e2c8b8a796738d7b657cbb146d
@@ -2056,6 +2070,7 @@ LIBCFS_NL_DUMP_EXT_ACK
 # 4.20
 LIBCFS_HAVE_IOV_ITER_TYPE
 # 5.0
+LIBCFS_GENRADIX
 LIBCFS_MM_TOTALRAM_PAGES_FUNC
 LIBCFS_GET_REQUEST_KEY_AUTH
 # 5.3
index d3fedf6..c8f3966 100644 (file)
@@ -1,3 +1,3 @@
 EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \
             linux-list.h linux-hash.h linux-uuid.h linux-wait.h linux-net.h \
-            glob.h refcount.h processor.h xarray.h
+            generic-radix-tree.h glob.h refcount.h processor.h xarray.h
diff --git a/libcfs/include/libcfs/linux/generic-radix-tree.h b/libcfs/include/libcfs/linux/generic-radix-tree.h
new file mode 100644 (file)
index 0000000..c61bb59
--- /dev/null
@@ -0,0 +1,235 @@
+#ifndef _LINUX_GENERIC_RADIX_TREE_H
+#define _LINUX_GENERIC_RADIX_TREE_H
+
+/**
+ * DOC: Generic radix trees/sparse arrays
+ *
+ * Very simple and minimalistic, supporting arbitrary size entries up to
+ * PAGE_SIZE.
+ *
+ * A genradix is defined with the type it will store, like so:
+ *
+ * static GENRADIX(struct foo) foo_genradix;
+ *
+ * The main operations are:
+ *
+ * - genradix_init(radix) - initialize an empty genradix
+ *
+ * - genradix_free(radix) - free all memory owned by the genradix and
+ *   reinitialize it
+ *
+ * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
+ *   NULL if that entry does not exist
+ *
+ * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
+ *   allocating it if necessary
+ *
+ * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
+ *
+ * The radix tree allocates one page of entries at a time, so entries may exist
+ * that were never explicitly allocated - they will be initialized to all
+ * zeroes.
+ *
+ * Internally, a genradix is just a radix tree of pages, and indexing works in
+ * terms of byte offsets. The wrappers in this header file use sizeof on the
+ * type the radix contains to calculate a byte offset from the index - see
+ * __idx_to_offset.
+ */
+/*
+ * Taken from Linux kernel 5.15
+ */
+#ifndef HAVE_GENRADIX_SUPPORT
+#include <asm/page.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+struct genradix_root;
+
+struct __genradix {
+       struct genradix_root            *root;
+};
+
+/*
+ * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ */
+
+#define __GENRADIX_INITIALIZER                                 \
+       {                                                       \
+               .tree = {                                       \
+                       .root = NULL,                           \
+               }                                               \
+       }
+
+/*
+ * We use a 0 size array to stash the type we're storing without taking any
+ * space at runtime - then the various accessor macros can use typeof() to get
+ * to it for casts/sizeof - we also force the alignment so that storing a type
+ * with a ridiculous alignment doesn't blow up the alignment or size of the
+ * genradix.
+ */
+
+#define GENRADIX(_type)                                                \
+struct {                                                       \
+       struct __genradix       tree;                           \
+       _type                   type[0] __aligned(1);           \
+}
+
+#define DEFINE_GENRADIX(_name, _type)                          \
+       GENRADIX(_type) _name = __GENRADIX_INITIALIZER
+
+/**
+ * genradix_init - initialize a genradix
+ * @_radix:    genradix to initialize
+ *
+ * Does not fail
+ */
+#define genradix_init(_radix)                                  \
+do {                                                           \
+       *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER;   \
+} while (0)
+
+void __genradix_free(struct __genradix *);
+
+/**
+ * genradix_free: free all memory owned by a genradix
+ * @_radix: the genradix to free
+ *
+ * After freeing, @_radix will be reinitialized and empty
+ */
+#define genradix_free(_radix)  __genradix_free(&(_radix)->tree)
+
+static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+{
+       if (__builtin_constant_p(obj_size))
+               BUILD_BUG_ON(obj_size > PAGE_SIZE);
+       else
+               BUG_ON(obj_size > PAGE_SIZE);
+
+       if (!is_power_of_2(obj_size)) {
+               size_t objs_per_page = PAGE_SIZE / obj_size;
+
+               return (idx / objs_per_page) * PAGE_SIZE +
+                       (idx % objs_per_page) * obj_size;
+       } else {
+               return idx * obj_size;
+       }
+}
+
+#define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
+#define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
+#define __genradix_idx_to_offset(_radix, _idx)                 \
+       __idx_to_offset(_idx, __genradix_obj_size(_radix))
+
+void *__genradix_ptr(struct __genradix *, size_t);
+
+/**
+ * genradix_ptr - get a pointer to a genradix entry
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ *
+ * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
+ */
+#define genradix_ptr(_radix, _idx)                             \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr(&(_radix)->tree,                        \
+                       __genradix_idx_to_offset(_radix, _idx)))
+
+void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
+ *                     if necessary
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ * @_gfp:      gfp mask
+ *
+ * Returns a pointer to entry at @_idx, or NULL on allocation failure
+ */
+#define genradix_ptr_alloc(_radix, _idx, _gfp)                 \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr_alloc(&(_radix)->tree,                  \
+                       __genradix_idx_to_offset(_radix, _idx), \
+                       _gfp))
+
+struct genradix_iter {
+       size_t                  offset;
+       size_t                  pos;
+};
+
+/**
+ * genradix_iter_init - initialize a genradix_iter
+ * @_radix:    genradix that will be iterated over
+ * @_idx:      index to start iterating from
+ */
+#define genradix_iter_init(_radix, _idx)                       \
+       ((struct genradix_iter) {                               \
+               .pos    = (_idx),                               \
+               .offset = __genradix_idx_to_offset((_radix), (_idx)),\
+       })
+
+void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or above iterator's current
+ *                     position
+ * @_iter:     a genradix_iter
+ * @_radix:    genradix being iterated over
+ *
+ * If no more entries exist at or above @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek(_iter, _radix)                      \
+       (__genradix_cast(_radix)                                \
+        __genradix_iter_peek(_iter, &(_radix)->tree,           \
+                             PAGE_SIZE / __genradix_obj_size(_radix)))
+
+static inline void __genradix_iter_advance(struct genradix_iter *iter,
+                                          size_t obj_size)
+{
+       iter->offset += obj_size;
+
+       if (!is_power_of_2(obj_size) &&
+           (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
+               iter->offset = round_up(iter->offset, PAGE_SIZE);
+
+       iter->pos++;
+}
+
+#define genradix_iter_advance(_iter, _radix)                   \
+       __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+
+#define genradix_for_each_from(_radix, _iter, _p, _start)      \
+       for (_iter = genradix_iter_init(_radix, _start);        \
+            (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
+            genradix_iter_advance(&_iter, _radix))
+
+/**
+ * genradix_for_each - iterate over entry in a genradix
+ * @_radix:    genradix to iterate over
+ * @_iter:     a genradix_iter to track current position
+ * @_p:                pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each(_radix, _iter, _p)                   \
+       genradix_for_each_from(_radix, _iter, _p, 0)
+
+int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_prealloc - preallocate entries in a generic radix tree
+ * @_radix:    genradix to preallocate
+ * @_nr:       number of entries to preallocate
+ * @_gfp:      gfp mask
+ *
+ * Returns 0 on success, -ENOMEM on failure
+ */
+#define genradix_prealloc(_radix, _nr, _gfp)                   \
+        __genradix_prealloc(&(_radix)->tree,                   \
+                       __genradix_idx_to_offset(_radix, _nr + 1),\
+                       _gfp)
+
+
+#endif /* _LINUX_GENERIC_RADIX_TREE_H */
+#endif /* ! HAVE_GENRADIX_SUPPORT */
index 20b5a27..b94628e 100644 (file)
@@ -3,6 +3,7 @@ MODULES = libcfs
 libcfs-linux-objs := linux-prim.o
 libcfs-linux-objs += linux-hash.o
 libcfs-linux-objs += linux-wait.o
+libcfs-linux-objs += generic-radix-tree.o
 libcfs-linux-objs += glob.o
 libcfs-linux-objs += xarray.o
 
index 959e1ff..0a5c870 100644 (file)
@@ -1,5 +1,6 @@
 EXTRA_DIST = linux-prim.c \
             linux-hash.c \
             linux-wait.c \
+            generic-radix-tree.c \
             glob.c \
             xarray.c
diff --git a/libcfs/libcfs/linux/generic-radix-tree.c b/libcfs/libcfs/linux/generic-radix-tree.c
new file mode 100644 (file)
index 0000000..ac96895
--- /dev/null
@@ -0,0 +1,242 @@
+#ifndef HAVE_GENRADIX_SUPPORT
+
+/* Taken from 5.15 kernel */
+
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
+
+#define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+       union {
+               /* Interior node: */
+               struct genradix_node    *children[GENRADIX_ARY];
+
+               /* Leaf: */
+               u8                      data[PAGE_SIZE];
+       };
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+       return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+       return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH     \
+       DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK                            \
+       ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+       return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+       return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+       struct genradix_root *r = READ_ONCE(radix->root);
+       struct genradix_node *n = genradix_root_to_node(r);
+       unsigned level          = genradix_root_to_depth(r);
+
+       if (ilog2(offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (1) {
+               if (!n)
+                       return NULL;
+               if (!level)
+                       break;
+
+               level--;
+
+               n = n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+       }
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+       struct genradix_node *node;
+
+       node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+       /*
+        * We're using pages (not slab allocations) directly for kernel data
+        * structures, so we need to explicitly inform kmemleak of them in order
+        * to avoid false positive memory leak reports.
+        */
+       kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+       return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+       kmemleak_free(node);
+       free_page((unsigned long)node);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+                          gfp_t gfp_mask)
+{
+       struct genradix_root *v = READ_ONCE(radix->root);
+       struct genradix_node *n, *new_node = NULL;
+       unsigned level;
+
+       /* Increase tree depth if necessary: */
+       while (1) {
+               struct genradix_root *r = v, *new_root;
+
+               n       = genradix_root_to_node(r);
+               level   = genradix_root_to_depth(r);
+
+               if (n && ilog2(offset) < genradix_depth_shift(level))
+                       break;
+
+               if (!new_node) {
+                       new_node = genradix_alloc_node(gfp_mask);
+                       if (!new_node)
+                               return NULL;
+               }
+
+               new_node->children[0] = n;
+               new_root = ((struct genradix_root *)
+                           ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+               if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+                       v = new_root;
+                       new_node = NULL;
+               }
+       }
+
+       while (level--) {
+               struct genradix_node **p =
+                       &n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+
+               n = READ_ONCE(*p);
+               if (!n) {
+                       if (!new_node) {
+                               new_node = genradix_alloc_node(gfp_mask);
+                               if (!new_node)
+                                       return NULL;
+                       }
+
+                       if (!(n = cmpxchg_release(p, NULL, new_node)))
+                               swap(n, new_node);
+               }
+       }
+
+       if (new_node)
+               genradix_free_node(new_node);
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+                          struct __genradix *radix,
+                          size_t objs_per_page)
+{
+       struct genradix_root *r;
+       struct genradix_node *n;
+       unsigned level, i;
+restart:
+       r = READ_ONCE(radix->root);
+       if (!r)
+               return NULL;
+
+       n       = genradix_root_to_node(r);
+       level   = genradix_root_to_depth(r);
+
+       if (ilog2(iter->offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (level) {
+               level--;
+
+               i = (iter->offset >> genradix_depth_shift(level)) &
+                       (GENRADIX_ARY - 1);
+
+               while (!n->children[i]) {
+                       i++;
+                       iter->offset = round_down(iter->offset +
+                                          genradix_depth_size(level),
+                                          genradix_depth_size(level));
+                       iter->pos = (iter->offset >> PAGE_SHIFT) *
+                               objs_per_page;
+                       if (i == GENRADIX_ARY)
+                               goto restart;
+               }
+
+               n = n->children[i];
+       }
+
+       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+       if (level) {
+               unsigned i;
+
+               for (i = 0; i < GENRADIX_ARY; i++)
+                       if (n->children[i])
+                               genradix_free_recurse(n->children[i], level - 1);
+       }
+
+       genradix_free_node(n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+                       gfp_t gfp_mask)
+{
+       size_t offset;
+
+       for (offset = 0; offset < size; offset += PAGE_SIZE)
+               if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+                       return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+       struct genradix_root *r = xchg(&radix->root, NULL);
+
+       genradix_free_recurse(genradix_root_to_node(r),
+                             genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
+#endif /* !HAVE_GENRADIX_SUPPORT */