From 84e12028be9a13a5f702ce04adfd55d94d8a96ef Mon Sep 17 00:00:00 2001 From: James Simmons Date: Wed, 29 Apr 2020 08:37:32 -0400 Subject: [PATCH] LU-9859 libcfs: add support for Xarray In the late 4.X kernel cycle Xarrays were introduced with the goal of replacing the radix tree for the page cache. It is highly optimized for densely packed data which is the case for several items in Lustre such as the static array for the obd devices and quota ids. This patch provides Xarray support for kernels that lack. The current verison of Xarray back ported is from the 5.4-rc2 kernel. Change-Id: I54f9046f50a353e1cd4271c0b97207062bbf3898 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/37391 Tested-by: jenkins Reviewed-by: Shaun Tancheff Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Oleg Drokin --- libcfs/autoconf/lustre-libcfs.m4 | 24 + libcfs/include/libcfs/linux/Makefile.am | 2 +- libcfs/include/libcfs/linux/xarray.h | 1757 ++++++++++++++++++++++++++ libcfs/libcfs/Makefile.in | 1 + libcfs/libcfs/linux/Makefile.am | 3 +- libcfs/libcfs/linux/linux-prim.c | 4 +- libcfs/libcfs/linux/xarray.c | 2101 +++++++++++++++++++++++++++++++ lustre/autoconf/lustre-core.m4 | 31 +- lustre/include/lustre_compat.h | 21 +- lustre/include/obd.h | 1 + lustre/llite/llite_lib.c | 4 +- lustre/llite/vvp_io.c | 8 +- lustre/mdc/mdc_request.c | 13 +- 13 files changed, 3923 insertions(+), 47 deletions(-) create mode 100644 libcfs/include/libcfs/linux/xarray.h create mode 100644 libcfs/libcfs/linux/xarray.c diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 50c1431..ea62bf7 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -1069,6 +1069,28 @@ clear_and_wake_up_bit, [ ]) # LIBCFS_CLEAR_AND_WAKE_UP_BIT # +# LIBCFS_XARRAY_SUPPORT +# +# 4.19-rc5 kernel commit 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 +# replaced the radix tree implementation with Xarrays. This change +# introduced functionaly needed for general Xarray support +# +AC_DEFUN([LIBCFS_XARRAY_SUPPORT], [ +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_CHECK_COMPILE([if page cache uses Xarray], +xarray_support, [ + #include +],[ + radix_tree_exceptional_entry(NULL); +],[ + AC_DEFINE(HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY, 1, + [kernel lacks 'xa_is_value']) +]) +EXTRA_KCFLAGS="$tmp_flags" +]) # LIBCFS_XARRAY_SUPPORT + +# # LIBCFS_HAVE_IOV_ITER_TYPE # # kernel 4.20 commit 00e23707442a75b404392cef1405ab4fd498de6b @@ -1290,6 +1312,8 @@ LIBCFS_TIMER_SETUP LIBCFS_WAIT_VAR_EVENT # 4.17 LIBCFS_CLEAR_AND_WAKE_UP_BIT +# 4.19 +LIBCFS_XARRAY_SUPPORT # 4.20 LIBCFS_HAVE_IOV_ITER_TYPE # 5.0 diff --git a/libcfs/include/libcfs/linux/Makefile.am b/libcfs/include/libcfs/linux/Makefile.am index 187d360..389cd48 100644 --- a/libcfs/include/libcfs/linux/Makefile.am +++ b/libcfs/include/libcfs/linux/Makefile.am @@ -1,3 +1,3 @@ EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \ linux-list.h linux-hash.h linux-uuid.h linux-wait.h \ - refcount.h processor.h + refcount.h processor.h xarray.h diff --git a/libcfs/include/libcfs/linux/xarray.h b/libcfs/include/libcfs/linux/xarray.h new file mode 100644 index 0000000..735fc15 --- /dev/null +++ b/libcfs/include/libcfs/linux/xarray.h @@ -0,0 +1,1757 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _LINUX_XARRAY_H +#define _LINUX_XARRAY_H +/* + * eXtensible Arrays + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox + * + * This is taken from kernel commit: + * + * 7b785645e ("mm: fix page cache convergence regression") + * + * at kernel verison 5.2-rc2 + * + * See Documentation/core-api/xarray.rst for how to use the XArray. + */ +#ifdef HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The bottom two bits of the entry determine how the XArray interprets + * the contents: + * + * 00: Pointer entry + * 10: Internal entry + * x1: Value entry or tagged pointer + * + * Attempting to store internal entries in the XArray is a bug. + * + * Most internal entries are pointers to the next node in the tree. + * The following internal entries have a special meaning: + * + * 0-62: Sibling entries + * 256: Zero entry + * 257: Retry entry + * + * Errors are also represented as internal entries, but use the negative + * space (-4094 to -2). They're never stored in the slots array; only + * returned by the normal API. + */ + +#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1) + +/** + * xa_mk_value() - Create an XArray entry from an integer. + * @v: Value to store in XArray. + * + * Context: Any context. + * Return: An entry suitable for storing in the XArray. + */ +static inline void *xa_mk_value(unsigned long v) +{ + WARN_ON((long)v < 0); + return (void *)((v << 1) | 1); +} + +/** + * xa_to_value() - Get value stored in an XArray entry. + * @entry: XArray entry. + * + * Context: Any context. + * Return: The value stored in the XArray entry. + */ +static inline unsigned long xa_to_value(const void *entry) +{ + return (unsigned long)entry >> 1; +} + +/** + * xa_is_value() - Determine if an entry is a value. + * @entry: XArray entry. + * + * Context: Any context. + * Return: True if the entry is a value, false if it is a pointer. + */ +static inline bool xa_is_value(const void *entry) +{ + return (unsigned long)entry & 1; +} + +/** + * xa_tag_pointer() - Create an XArray entry for a tagged pointer. + * @p: Plain pointer. + * @tag: Tag value (0, 1 or 3). + * + * If the user of the XArray prefers, they can tag their pointers instead + * of storing value entries. Three tags are available (0, 1 and 3). + * These are distinct from the xa_mark_t as they are not replicated up + * through the array and cannot be searched for. + * + * Context: Any context. + * Return: An XArray entry. + */ +static inline void *xa_tag_pointer(void *p, unsigned long tag) +{ + return (void *)((unsigned long)p | tag); +} + +/** + * xa_untag_pointer() - Turn an XArray entry into a plain pointer. + * @entry: XArray entry. + * + * If you have stored a tagged pointer in the XArray, call this function + * to get the untagged version of the pointer. + * + * Context: Any context. + * Return: A pointer. + */ +static inline void *xa_untag_pointer(void *entry) +{ + return (void *)((unsigned long)entry & ~3UL); +} + +/** + * xa_pointer_tag() - Get the tag stored in an XArray entry. + * @entry: XArray entry. + * + * If you have stored a tagged pointer in the XArray, call this function + * to get the tag of that pointer. + * + * Context: Any context. + * Return: A tag. + */ +static inline unsigned int xa_pointer_tag(void *entry) +{ + return (unsigned long)entry & 3UL; +} + +/* + * xa_mk_internal() - Create an internal entry. + * @v: Value to turn into an internal entry. + * + * Internal entries are used for a number of purposes. Entries 0-255 are + * used for sibling entries (only 0-62 are used by the current code). 256 + * is used for the retry entry. 257 is used for the reserved / zero entry. + * Negative internal entries are used to represent errnos. Node pointers + * are also tagged as internal entries in some situations. + * + * Context: Any context. + * Return: An XArray internal entry corresponding to this value. + */ +static inline void *xa_mk_internal(unsigned long v) +{ + return (void *)((v << 2) | 2); +} + +/* + * xa_to_internal() - Extract the value from an internal entry. + * @entry: XArray entry. + * + * Context: Any context. + * Return: The value which was stored in the internal entry. + */ +static inline unsigned long xa_to_internal(const void *entry) +{ + return (unsigned long)entry >> 2; +} + +/* + * xa_is_internal() - Is the entry an internal entry? + * @entry: XArray entry. + * + * Context: Any context. + * Return: %true if the entry is an internal entry. + */ +static inline bool xa_is_internal(const void *entry) +{ + return ((unsigned long)entry & 3) == 2; +} + +#define XA_ZERO_ENTRY xa_mk_internal(257) + +/** + * xa_is_zero() - Is the entry a zero entry? + * @entry: Entry retrieved from the XArray + * + * The normal API will return NULL as the contents of a slot containing + * a zero entry. You can only see zero entries by using the advanced API. + * + * Return: %true if the entry is a zero entry. + */ +static inline bool xa_is_zero(const void *entry) +{ + return unlikely(entry == XA_ZERO_ENTRY); +} + +/** + * xa_is_err() - Report whether an XArray operation returned an error + * @entry: Result from calling an XArray function + * + * If an XArray operation cannot complete an operation, it will return + * a special value indicating an error. This function tells you + * whether an error occurred; xa_err() tells you which error occurred. + * + * Context: Any context. + * Return: %true if the entry indicates an error. + */ +static inline bool xa_is_err(const void *entry) +{ + return unlikely(xa_is_internal(entry) && + entry >= xa_mk_internal(-MAX_ERRNO)); +} + +/** + * xa_err() - Turn an XArray result into an errno. + * @entry: Result from calling an XArray function. + * + * If an XArray operation cannot complete an operation, it will return + * a special pointer value which encodes an errno. This function extracts + * the errno from the pointer value, or returns 0 if the pointer does not + * represent an errno. + * + * Context: Any context. + * Return: A negative errno or 0. + */ +static inline int xa_err(void *entry) +{ + /* xa_to_internal() would not do sign extension. */ + if (xa_is_err(entry)) + return (long)entry >> 2; + return 0; +} + +/** + * struct xa_limit - Represents a range of IDs. + * @min: The lowest ID to allocate (inclusive). + * @max: The maximum ID to allocate (inclusive). + * + * This structure is used either directly or via the XA_LIMIT() macro + * to communicate the range of IDs that are valid for allocation. + * Two common ranges are predefined for you: + * * xa_limit_32b - [0 - UINT_MAX] + * * xa_limit_31b - [0 - INT_MAX] + */ +struct xa_limit { + u32 max; + u32 min; +}; + +#define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max } + +#define xa_limit_32b XA_LIMIT(0, UINT_MAX) +#define xa_limit_31b XA_LIMIT(0, INT_MAX) + +typedef unsigned __bitwise xa_mark_t; +#define XA_MARK_0 ((__force xa_mark_t)0U) +#define XA_MARK_1 ((__force xa_mark_t)1U) +#define XA_MARK_2 ((__force xa_mark_t)2U) +#define XA_PRESENT ((__force xa_mark_t)8U) +#define XA_MARK_MAX XA_MARK_2 +#define XA_FREE_MARK XA_MARK_0 + +enum xa_lock_type { + XA_LOCK_IRQ = 1, + XA_LOCK_BH = 2, +}; + +/* + * Values for xa_flags. The radix tree stores its GFP flags in the xa_flags, + * and we remain compatible with that. + */ +#define XA_FLAGS_LOCK_IRQ ((__force gfp_t)XA_LOCK_IRQ) +#define XA_FLAGS_LOCK_BH ((__force gfp_t)XA_LOCK_BH) +#define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) +#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) +#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) +#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) +#define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ + (__force unsigned)(mark))) + +/* ALLOC is for a normal 0-based alloc. ALLOC1 is for an 1-based alloc */ +#define XA_FLAGS_ALLOC (XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK)) +#define XA_FLAGS_ALLOC1 (XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY) + +/** + * struct xarray - The anchor of the XArray. + * @xa_lock: Lock that protects the contents of the XArray. + * + * To use the xarray, define it statically or embed it in your data structure. + * It is a very small data structure, so it does not usually make sense to + * allocate it separately and keep a pointer to it in your data structure. + * + * You may use the xa_lock to protect your own data structures as well. + */ +/* + * If all of the entries in the array are NULL, @xa_head is a NULL pointer. + * If the only non-NULL entry in the array is at index 0, @xa_head is that + * entry. If any other entry in the array is non-NULL, @xa_head points + * to an @xa_node. + */ +struct xarray { + spinlock_t xa_lock; +/* private: The rest of the data structure is not to be used directly. */ + gfp_t xa_flags; + void __rcu *xa_head; +}; + +#define XARRAY_INIT(name, flags) { \ + .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock), \ + .xa_flags = flags, \ + .xa_head = NULL, \ +} + +/** + * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags. + * @name: A string that names your XArray. + * @flags: XA_FLAG values. + * + * This is intended for file scope definitions of XArrays. It declares + * and initialises an empty XArray with the chosen name and flags. It is + * equivalent to calling xa_init_flags() on the array, but it does the + * initialisation at compiletime instead of runtime. + */ +#define DEFINE_XARRAY_FLAGS(name, flags) \ + struct xarray name = XARRAY_INIT(name, flags) + +/** + * DEFINE_XARRAY() - Define an XArray. + * @name: A string that names your XArray. + * + * This is intended for file scope definitions of XArrays. It declares + * and initialises an empty XArray with the chosen name. It is equivalent + * to calling xa_init() on the array, but it does the initialisation at + * compiletime instead of runtime. + */ +#define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0) + +/** + * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0. + * @name: A string that names your XArray. + * + * This is intended for file scope definitions of allocating XArrays. + * See also DEFINE_XARRAY(). + */ +#define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC) + +/** + * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1. + * @name: A string that names your XArray. + * + * This is intended for file scope definitions of allocating XArrays. + * See also DEFINE_XARRAY(). + */ +#define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1) + +void *xa_load(struct xarray *, unsigned long index); +void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); +void *xa_erase(struct xarray *, unsigned long index); +void *xa_store_range(struct xarray *, unsigned long first, unsigned long last, + void *entry, gfp_t); +bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t); +void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); +void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); +void *xa_find(struct xarray *xa, unsigned long *index, + unsigned long max, xa_mark_t) __attribute__((nonnull(2))); +void *xa_find_after(struct xarray *xa, unsigned long *index, + unsigned long max, xa_mark_t) __attribute__((nonnull(2))); +unsigned int xa_extract(struct xarray *, void **dst, unsigned long start, + unsigned long max, unsigned int n, xa_mark_t); +void xa_destroy(struct xarray *); + +/** + * xa_init_flags() - Initialise an empty XArray with flags. + * @xa: XArray. + * @flags: XA_FLAG values. + * + * If you need to initialise an XArray with special flags (eg you need + * to take the lock from interrupt context), use this function instead + * of xa_init(). + * + * Context: Any context. + */ +static inline void xa_init_flags(struct xarray *xa, gfp_t flags) +{ + spin_lock_init(&xa->xa_lock); + xa->xa_flags = flags; + xa->xa_head = NULL; +} + +/** + * xa_init() - Initialise an empty XArray. + * @xa: XArray. + * + * An empty XArray is full of NULL entries. + * + * Context: Any context. + */ +static inline void xa_init(struct xarray *xa) +{ + xa_init_flags(xa, 0); +} + +/** + * xa_empty() - Determine if an array has any present entries. + * @xa: XArray. + * + * Context: Any context. + * Return: %true if the array contains only NULL pointers. + */ +static inline bool xa_empty(const struct xarray *xa) +{ + return xa->xa_head == NULL; +} + +/** + * xa_marked() - Inquire whether any entry in this array has a mark set + * @xa: Array + * @mark: Mark value + * + * Context: Any context. + * Return: %true if any entry has this mark set. + */ +static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark) +{ + return xa->xa_flags & XA_FLAGS_MARK(mark); +} + +/** + * xa_for_each_start() - Iterate over a portion of an XArray. + * @xa: XArray. + * @index: Index of @entry. + * @entry: Entry retrieved from array. + * @start: First index to retrieve from array. + * + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. You may modify @index during the iteration if you + * want to skip or reprocess indices. It is safe to modify the array + * during the iteration. At the end of the iteration, @entry will be set + * to NULL and @index will have a value less than or equal to max. + * + * xa_for_each_start() is O(n.log(n)) while xas_for_each() is O(n). You have + * to handle your own locking with xas_for_each(), and if you have to unlock + * after each iteration, it will also end up being O(n.log(n)). + * xa_for_each_start() will spin if it hits a retry entry; if you intend to + * see retry entries, you should use the xas_for_each() iterator instead. + * The xas_for_each() iterator will expand into more inline code than + * xa_for_each_start(). + * + * Context: Any context. Takes and releases the RCU lock. + */ +#define xa_for_each_start(xa, index, entry, start) \ + for (index = start, \ + entry = xa_find(xa, &index, ULONG_MAX, XA_PRESENT); \ + entry; \ + entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)) + +/** + * xa_for_each() - Iterate over present entries in an XArray. + * @xa: XArray. + * @index: Index of @entry. + * @entry: Entry retrieved from array. + * + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. You may modify @index during the iteration if you want + * to skip or reprocess indices. It is safe to modify the array during the + * iteration. At the end of the iteration, @entry will be set to NULL and + * @index will have a value less than or equal to max. + * + * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n). You have + * to handle your own locking with xas_for_each(), and if you have to unlock + * after each iteration, it will also end up being O(n.log(n)). xa_for_each() + * will spin if it hits a retry entry; if you intend to see retry entries, + * you should use the xas_for_each() iterator instead. The xas_for_each() + * iterator will expand into more inline code than xa_for_each(). + * + * Context: Any context. Takes and releases the RCU lock. + */ +#define xa_for_each(xa, index, entry) \ + xa_for_each_start(xa, index, entry, 0) + +/** + * xa_for_each_marked() - Iterate over marked entries in an XArray. + * @xa: XArray. + * @index: Index of @entry. + * @entry: Entry retrieved from array. + * @filter: Selection criterion. + * + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. The iteration will skip all entries in the array + * which do not match @filter. You may modify @index during the iteration + * if you want to skip or reprocess indices. It is safe to modify the array + * during the iteration. At the end of the iteration, @entry will be set to + * NULL and @index will have a value less than or equal to max. + * + * xa_for_each_marked() is O(n.log(n)) while xas_for_each_marked() is O(n). + * You have to handle your own locking with xas_for_each(), and if you have + * to unlock after each iteration, it will also end up being O(n.log(n)). + * xa_for_each_marked() will spin if it hits a retry entry; if you intend to + * see retry entries, you should use the xas_for_each_marked() iterator + * instead. The xas_for_each_marked() iterator will expand into more inline + * code than xa_for_each_marked(). + * + * Context: Any context. Takes and releases the RCU lock. + */ +#define xa_for_each_marked(xa, index, entry, filter) \ + for (index = 0, entry = xa_find(xa, &index, ULONG_MAX, filter); \ + entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter)) + +#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) +#define xa_lock(xa) spin_lock(&(xa)->xa_lock) +#define xa_unlock(xa) spin_unlock(&(xa)->xa_lock) +#define xa_lock_bh(xa) spin_lock_bh(&(xa)->xa_lock) +#define xa_unlock_bh(xa) spin_unlock_bh(&(xa)->xa_lock) +#define xa_lock_irq(xa) spin_lock_irq(&(xa)->xa_lock) +#define xa_unlock_irq(xa) spin_unlock_irq(&(xa)->xa_lock) +#define xa_lock_irqsave(xa, flags) \ + spin_lock_irqsave(&(xa)->xa_lock, flags) +#define xa_unlock_irqrestore(xa, flags) \ + spin_unlock_irqrestore(&(xa)->xa_lock, flags) + +/* + * Versions of the normal API which require the caller to hold the + * xa_lock. If the GFP flags allow it, they will drop the lock to + * allocate memory, then reacquire it afterwards. These functions + * may also re-enable interrupts if the XArray flags indicate the + * locking should be interrupt safe. + */ +void *__xa_erase(struct xarray *, unsigned long index); +void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); +void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, + void *entry, gfp_t); +int __must_check __xa_insert(struct xarray *, unsigned long index, + void *entry, gfp_t); +int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry, + struct xa_limit, gfp_t); +int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry, + struct xa_limit, u32 *next, gfp_t); +void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); +void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); + +/** + * xa_store_bh() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_store() except it disables softirqs + * while holding the array lock. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. + * Return: The entry which used to be at this index. + */ +static inline void *xa_store_bh(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_bh(xa); + curr = __xa_store(xa, index, entry, gfp); + xa_unlock_bh(xa); + + return curr; +} + +/** + * xa_store_irq() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_store() except it disables interrupts + * while holding the array lock. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. + * Return: The entry which used to be at this index. + */ +static inline void *xa_store_irq(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_irq(xa); + curr = __xa_store(xa, index, entry, gfp); + xa_unlock_irq(xa); + + return curr; +} + +/** + * xa_erase_bh() - Erase this entry from the XArray. + * @xa: XArray. + * @index: Index of entry. + * + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. + * Return: The entry which used to be at this index. + */ +static inline void *xa_erase_bh(struct xarray *xa, unsigned long index) +{ + void *entry; + + xa_lock_bh(xa); + entry = __xa_erase(xa, index); + xa_unlock_bh(xa); + + return entry; +} + +/** + * xa_erase_irq() - Erase this entry from the XArray. + * @xa: XArray. + * @index: Index of entry. + * + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. + * Return: The entry which used to be at this index. + */ +static inline void *xa_erase_irq(struct xarray *xa, unsigned long index) +{ + void *entry; + + xa_lock_irq(xa); + entry = __xa_erase(xa, index); + xa_unlock_irq(xa); + + return entry; +} + +/** + * xa_cmpxchg() - Conditionally replace an entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New value to place in array. + * @gfp: Memory allocation flags. + * + * If the entry at @index is the same as @old, replace it with @entry. + * If the return value is equal to @old, then the exchange was successful. + * + * Context: Any context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: The old value at this index or xa_err() if an error happened. + */ +static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock(xa); + curr = __xa_cmpxchg(xa, index, old, entry, gfp); + xa_unlock(xa); + + return curr; +} + +/** + * xa_cmpxchg_bh() - Conditionally replace an entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New value to place in array. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_cmpxchg() except it disables softirqs + * while holding the array lock. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: The old value at this index or xa_err() if an error happened. + */ +static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_bh(xa); + curr = __xa_cmpxchg(xa, index, old, entry, gfp); + xa_unlock_bh(xa); + + return curr; +} + +/** + * xa_cmpxchg_irq() - Conditionally replace an entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New value to place in array. + * @gfp: Memory allocation flags. + * + * This function is like calling xa_cmpxchg() except it disables interrupts + * while holding the array lock. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: The old value at this index or xa_err() if an error happened. + */ +static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock_irq(xa); + curr = __xa_cmpxchg(xa, index, old, entry, gfp); + xa_unlock_irq(xa); + + return curr; +} + +/** + * xa_insert() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int __must_check xa_insert(struct xarray *xa, + unsigned long index, void *entry, gfp_t gfp) +{ + int err; + + xa_lock(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_insert_bh() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int __must_check xa_insert_bh(struct xarray *xa, + unsigned long index, void *entry, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_insert_irq() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int __must_check xa_insert_irq(struct xarray *xa, + unsigned long index, void *entry, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock_irq(xa); + + return err; +} + +/** + * xa_alloc() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. + */ +static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) +{ + int err; + + xa_lock(xa); + err = __xa_alloc(xa, id, entry, limit, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_alloc_bh() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. + */ +static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_alloc(xa, id, entry, limit, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_alloc_irq() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. + */ +static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, + void *entry, struct xa_limit limit, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_alloc(xa, id, entry, limit, gfp); + xa_unlock_irq(xa); + + return err; +} + +/** + * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); + xa_unlock_irq(xa); + + return err; +} + +/** + * xa_reserve() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * Ensures there is somewhere to store an entry at @index in the array. + * If there is already something stored at @index, this function does + * nothing. If there was nothing there, the entry is marked as reserved. + * Loading from a reserved entry returns a %NULL pointer. + * + * If you do not use the entry that you have reserved, call xa_release() + * or xa_erase() to free any unnecessary memory. + * + * Context: Any context. Takes and releases the xa_lock. + * May sleep if the @gfp flags permit. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline __must_check +int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + return xa_err(xa_cmpxchg(xa, index, NULL, XA_ZERO_ENTRY, gfp)); +} + +/** + * xa_reserve_bh() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * A softirq-disabling version of xa_reserve(). + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline __must_check +int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + return xa_err(xa_cmpxchg_bh(xa, index, NULL, XA_ZERO_ENTRY, gfp)); +} + +/** + * xa_reserve_irq() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * An interrupt-disabling version of xa_reserve(). + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +static inline __must_check +int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + return xa_err(xa_cmpxchg_irq(xa, index, NULL, XA_ZERO_ENTRY, gfp)); +} + +/** + * xa_release() - Release a reserved entry. + * @xa: XArray. + * @index: Index of entry. + * + * After calling xa_reserve(), you can call this function to release the + * reservation. If the entry at @index has been stored to, this function + * will do nothing. + */ +static inline void xa_release(struct xarray *xa, unsigned long index) +{ + xa_cmpxchg(xa, index, XA_ZERO_ENTRY, NULL, 0); +} + +/* Everything below here is the Advanced API. Proceed with caution. */ + +/* + * The xarray is constructed out of a set of 'chunks' of pointers. Choosing + * the best chunk size requires some tradeoffs. A power of two recommends + * itself so that we can walk the tree based purely on shifts and masks. + * Generally, the larger the better; as the number of slots per level of the + * tree increases, the less tall the tree needs to be. But that needs to be + * balanced against the memory consumption of each node. On a 64-bit system, + * xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we + * doubled the number of slots per node, we'd get only 3 nodes per 4kB page. + */ +#ifndef XA_CHUNK_SHIFT +#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#endif +#define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) +#define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) +#define XA_MAX_MARKS 3 +#define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG) + +/* + * @count is the count of every non-NULL element in the ->slots array + * whether that is a value entry, a retry entry, a user pointer, + * a sibling entry or a pointer to the next level of the tree. + * @nr_values is the count of every element in ->slots which is + * either a value entry or a sibling of a value entry. + */ +struct xa_node { + unsigned char shift; /* Bits remaining in each slot */ + unsigned char offset; /* Slot offset in parent */ + unsigned char count; /* Total entry count */ + unsigned char nr_values; /* Value entry count */ + struct xa_node __rcu *parent; /* NULL at top of tree */ + struct xarray *array; /* The array we belong to */ + union { + struct list_head private_list; /* For tree user */ + struct rcu_head rcu_head; /* Used when freeing node */ + }; + void __rcu *slots[XA_CHUNK_SIZE]; + union { + unsigned long tags[XA_MAX_MARKS][XA_MARK_LONGS]; + unsigned long marks[XA_MAX_MARKS][XA_MARK_LONGS]; + }; +}; + +void xa_dump(const struct xarray *); +void xa_dump_node(const struct xa_node *); + +#ifdef XA_DEBUG +#define XA_BUG_ON(xa, x) do { \ + if (x) { \ + xa_dump(xa); \ + BUG(); \ + } \ + } while (0) +#define XA_NODE_BUG_ON(node, x) do { \ + if (x) { \ + if (node) xa_dump_node(node); \ + BUG(); \ + } \ + } while (0) +#else +#define XA_BUG_ON(xa, x) do { } while (0) +#define XA_NODE_BUG_ON(node, x) do { } while (0) +#endif + +/* Private */ +static inline void *xa_head(const struct xarray *xa) +{ + return rcu_dereference_check(xa->xa_head, + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline void *xa_head_locked(const struct xarray *xa) +{ + return rcu_dereference_protected(xa->xa_head, + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline void *xa_entry(const struct xarray *xa, + const struct xa_node *node, unsigned int offset) +{ + XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE); + return rcu_dereference_check(node->slots[offset], + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline void *xa_entry_locked(const struct xarray *xa, + const struct xa_node *node, unsigned int offset) +{ + XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE); + return rcu_dereference_protected(node->slots[offset], + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline struct xa_node *xa_parent(const struct xarray *xa, + const struct xa_node *node) +{ + return rcu_dereference_check(node->parent, + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline struct xa_node *xa_parent_locked(const struct xarray *xa, + const struct xa_node *node) +{ + return rcu_dereference_protected(node->parent, + lockdep_is_held(&xa->xa_lock)); +} + +/* Private */ +static inline void *xa_mk_node(const struct xa_node *node) +{ + return (void *)((unsigned long)node | 2); +} + +/* Private */ +static inline struct xa_node *xa_to_node(const void *entry) +{ + return (struct xa_node *)((unsigned long)entry - 2); +} + +/* Private */ +static inline bool xa_is_node(const void *entry) +{ + return xa_is_internal(entry) && (unsigned long)entry > 4096; +} + +/* Private */ +static inline void *xa_mk_sibling(unsigned int offset) +{ + return xa_mk_internal(offset); +} + +/* Private */ +static inline unsigned long xa_to_sibling(const void *entry) +{ + return xa_to_internal(entry); +} + +/** + * xa_is_sibling() - Is the entry a sibling entry? + * @entry: Entry retrieved from the XArray + * + * Return: %true if the entry is a sibling entry. + */ +static inline bool xa_is_sibling(const void *entry) +{ + return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) && + (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1)); +} + +#define XA_RETRY_ENTRY xa_mk_internal(256) + +/** + * xa_is_retry() - Is the entry a retry entry? + * @entry: Entry retrieved from the XArray + * + * Return: %true if the entry is a retry entry. + */ +static inline bool xa_is_retry(const void *entry) +{ + return unlikely(entry == XA_RETRY_ENTRY); +} + +/** + * xa_is_advanced() - Is the entry only permitted for the advanced API? + * @entry: Entry to be stored in the XArray. + * + * Return: %true if the entry cannot be stored by the normal API. + */ +static inline bool xa_is_advanced(const void *entry) +{ + return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY); +} + +/** + * typedef xa_update_node_t - A callback function from the XArray. + * @node: The node which is being processed + * + * This function is called every time the XArray updates the count of + * present and value entries in a node. It allows advanced users to + * maintain the private_list in the node. + * + * Context: The xa_lock is held and interrupts may be disabled. + * Implementations should not drop the xa_lock, nor re-enable + * interrupts. + */ +typedef void (*xa_update_node_t)(struct xa_node *node); + +/* + * The xa_state is opaque to its users. It contains various different pieces + * of state involved in the current operation on the XArray. It should be + * declared on the stack and passed between the various internal routines. + * The various elements in it should not be accessed directly, but only + * through the provided accessor functions. The below documentation is for + * the benefit of those working on the code, not for users of the XArray. + * + * @xa_node usually points to the xa_node containing the slot we're operating + * on (and @xa_offset is the offset in the slots array). If there is a + * single entry in the array at index 0, there are no allocated xa_nodes to + * point to, and so we store %NULL in @xa_node. @xa_node is set to + * the value %XAS_RESTART if the xa_state is not walked to the correct + * position in the tree of nodes for this operation. If an error occurs + * during an operation, it is set to an %XAS_ERROR value. If we run off the + * end of the allocated nodes, it is set to %XAS_BOUNDS. + */ +struct xa_state { + struct xarray *xa; + unsigned long xa_index; + unsigned char xa_shift; + unsigned char xa_sibs; + unsigned char xa_offset; + unsigned char xa_pad; /* Helps gcc generate better code */ + struct xa_node *xa_node; + struct xa_node *xa_alloc; + xa_update_node_t xa_update; +}; + +/* + * We encode errnos in the xas->xa_node. If an error has happened, we need to + * drop the lock to fix it, and once we've done so the xa_state is invalid. + */ +#define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL)) +#define XAS_BOUNDS ((struct xa_node *)1UL) +#define XAS_RESTART ((struct xa_node *)3UL) + +#define __XA_STATE(array, index, shift, sibs) { \ + .xa = array, \ + .xa_index = index, \ + .xa_shift = shift, \ + .xa_sibs = sibs, \ + .xa_offset = 0, \ + .xa_pad = 0, \ + .xa_node = XAS_RESTART, \ + .xa_alloc = NULL, \ + .xa_update = NULL \ +} + +/** + * XA_STATE() - Declare an XArray operation state. + * @name: Name of this operation state (usually xas). + * @array: Array to operate on. + * @index: Initial index of interest. + * + * Declare and initialise an xa_state on the stack. + */ +#define XA_STATE(name, array, index) \ + struct xa_state name = __XA_STATE(array, index, 0, 0) + +/** + * XA_STATE_ORDER() - Declare an XArray operation state. + * @name: Name of this operation state (usually xas). + * @array: Array to operate on. + * @index: Initial index of interest. + * @order: Order of entry. + * + * Declare and initialise an xa_state on the stack. This variant of + * XA_STATE() allows you to specify the 'order' of the element you + * want to operate on.` + */ +#define XA_STATE_ORDER(name, array, index, order) \ + struct xa_state name = __XA_STATE(array, \ + (index >> order) << order, \ + order - (order % XA_CHUNK_SHIFT), \ + (1U << (order % XA_CHUNK_SHIFT)) - 1) + +#define xas_marked(xas, mark) xa_marked((xas)->xa, (mark)) +#define xas_trylock(xas) xa_trylock((xas)->xa) +#define xas_lock(xas) xa_lock((xas)->xa) +#define xas_unlock(xas) xa_unlock((xas)->xa) +#define xas_lock_bh(xas) xa_lock_bh((xas)->xa) +#define xas_unlock_bh(xas) xa_unlock_bh((xas)->xa) +#define xas_lock_irq(xas) xa_lock_irq((xas)->xa) +#define xas_unlock_irq(xas) xa_unlock_irq((xas)->xa) +#define xas_lock_irqsave(xas, flags) \ + xa_lock_irqsave((xas)->xa, flags) +#define xas_unlock_irqrestore(xas, flags) \ + xa_unlock_irqrestore((xas)->xa, flags) + +/** + * xas_error() - Return an errno stored in the xa_state. + * @xas: XArray operation state. + * + * Return: 0 if no error has been noted. A negative errno if one has. + */ +static inline int xas_error(const struct xa_state *xas) +{ + return xa_err(xas->xa_node); +} + +/** + * xas_set_err() - Note an error in the xa_state. + * @xas: XArray operation state. + * @err: Negative error number. + * + * Only call this function with a negative @err; zero or positive errors + * will probably not behave the way you think they should. If you want + * to clear the error from an xa_state, use xas_reset(). + */ +static inline void xas_set_err(struct xa_state *xas, long err) +{ + xas->xa_node = XA_ERROR(err); +} + +/** + * xas_invalid() - Is the xas in a retry or error state? + * @xas: XArray operation state. + * + * Return: %true if the xas cannot be used for operations. + */ +static inline bool xas_invalid(const struct xa_state *xas) +{ + return (unsigned long)xas->xa_node & 3; +} + +/** + * xas_valid() - Is the xas a valid cursor into the array? + * @xas: XArray operation state. + * + * Return: %true if the xas can be used for operations. + */ +static inline bool xas_valid(const struct xa_state *xas) +{ + return !xas_invalid(xas); +} + +/** + * xas_is_node() - Does the xas point to a node? + * @xas: XArray operation state. + * + * Return: %true if the xas currently references a node. + */ +static inline bool xas_is_node(const struct xa_state *xas) +{ + return xas_valid(xas) && xas->xa_node; +} + +/* True if the pointer is something other than a node */ +static inline bool xas_not_node(struct xa_node *node) +{ + return ((unsigned long)node & 3) || !node; +} + +/* True if the node represents RESTART or an error */ +static inline bool xas_frozen(struct xa_node *node) +{ + return (unsigned long)node & 2; +} + +/* True if the node represents head-of-tree, RESTART or BOUNDS */ +static inline bool xas_top(struct xa_node *node) +{ + return node <= XAS_RESTART; +} + +/** + * xas_reset() - Reset an XArray operation state. + * @xas: XArray operation state. + * + * Resets the error or walk state of the @xas so future walks of the + * array will start from the root. Use this if you have dropped the + * xarray lock and want to reuse the xa_state. + * + * Context: Any context. + */ +static inline void xas_reset(struct xa_state *xas) +{ + xas->xa_node = XAS_RESTART; +} + +/** + * xas_retry() - Retry the operation if appropriate. + * @xas: XArray operation state. + * @entry: Entry from xarray. + * + * The advanced functions may sometimes return an internal entry, such as + * a retry entry or a zero entry. This function sets up the @xas to restart + * the walk from the head of the array if needed. + * + * Context: Any context. + * Return: true if the operation needs to be retried. + */ +static inline bool xas_retry(struct xa_state *xas, const void *entry) +{ + if (xa_is_zero(entry)) + return true; + if (!xa_is_retry(entry)) + return false; + xas_reset(xas); + return true; +} + +void *xas_load(struct xa_state *); +void *xas_store(struct xa_state *, void *entry); +void *xas_find(struct xa_state *, unsigned long max); +void *xas_find_conflict(struct xa_state *); + +bool xas_get_mark(const struct xa_state *, xa_mark_t); +void xas_set_mark(const struct xa_state *, xa_mark_t); +void xas_clear_mark(const struct xa_state *, xa_mark_t); +void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t); +void xas_init_marks(const struct xa_state *); + +bool xas_nomem(struct xa_state *, gfp_t); +void xas_pause(struct xa_state *); + +void xas_create_range(struct xa_state *); + +/** + * xas_reload() - Refetch an entry from the xarray. + * @xas: XArray operation state. + * + * Use this function to check that a previously loaded entry still has + * the same value. This is useful for the lockless pagecache lookup where + * we walk the array with only the RCU lock to protect us, lock the page, + * then check that the page hasn't moved since we looked it up. + * + * The caller guarantees that @xas is still valid. If it may be in an + * error or restart state, call xas_load() instead. + * + * Return: The entry at this location in the xarray. + */ +static inline void *xas_reload(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + if (node) + return xa_entry(xas->xa, node, xas->xa_offset); + return xa_head(xas->xa); +} + +/** + * xas_set() - Set up XArray operation state for a different index. + * @xas: XArray operation state. + * @index: New index into the XArray. + * + * Move the operation state to refer to a different index. This will + * have the effect of starting a walk from the top; see xas_next() + * to move to an adjacent index. + */ +static inline void xas_set(struct xa_state *xas, unsigned long index) +{ + xas->xa_index = index; + xas->xa_node = XAS_RESTART; +} + +/** + * xas_set_order() - Set up XArray operation state for a multislot entry. + * @xas: XArray operation state. + * @index: Target of the operation. + * @order: Entry occupies 2^@order indices. + */ +static inline void xas_set_order(struct xa_state *xas, unsigned long index, + unsigned int order) +{ +#ifdef CONFIG_XARRAY_MULTI + xas->xa_index = order < BITS_PER_LONG ? (index >> order) << order : 0; + xas->xa_shift = order - (order % XA_CHUNK_SHIFT); + xas->xa_sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + xas->xa_node = XAS_RESTART; +#else + BUG_ON(order > 0); + xas_set(xas, index); +#endif +} + +/** + * xas_set_update() - Set up XArray operation state for a callback. + * @xas: XArray operation state. + * @update: Function to call when updating a node. + * + * The XArray can notify a caller after it has updated an xa_node. + * This is advanced functionality and is only needed by the page cache. + */ +static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) +{ + xas->xa_update = update; +} + +/** + * xas_next_entry() - Advance iterator to next present entry. + * @xas: XArray operation state. + * @max: Highest index to return. + * + * xas_next_entry() is an inline function to optimise xarray traversal for + * speed. It is equivalent to calling xas_find(), and will call xas_find() + * for all the hard cases. + * + * Return: The next present entry after the one currently referred to by @xas. + */ +static inline void *xas_next_entry(struct xa_state *xas, unsigned long max) +{ + struct xa_node *node = xas->xa_node; + void *entry; + + if (unlikely(xas_not_node(node) || node->shift || + xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK))) + return xas_find(xas, max); + + do { + if (unlikely(xas->xa_index >= max)) + return xas_find(xas, max); + if (unlikely(xas->xa_offset == XA_CHUNK_MASK)) + return xas_find(xas, max); + entry = xa_entry(xas->xa, node, xas->xa_offset + 1); + if (unlikely(xa_is_internal(entry))) + return xas_find(xas, max); + xas->xa_offset++; + xas->xa_index++; + } while (!entry); + + return entry; +} + +/* Private */ +static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance, + xa_mark_t mark) +{ + unsigned long *addr = xas->xa_node->marks[(__force unsigned)mark]; + unsigned int offset = xas->xa_offset; + + if (advance) + offset++; + if (XA_CHUNK_SIZE == BITS_PER_LONG) { + if (offset < XA_CHUNK_SIZE) { + unsigned long data = *addr & (~0UL << offset); + if (data) + return __ffs(data); + } + return XA_CHUNK_SIZE; + } + + return find_next_bit(addr, XA_CHUNK_SIZE, offset); +} + +/** + * xas_next_marked() - Advance iterator to next marked entry. + * @xas: XArray operation state. + * @max: Highest index to return. + * @mark: Mark to search for. + * + * xas_next_marked() is an inline function to optimise xarray traversal for + * speed. It is equivalent to calling xas_find_marked(), and will call + * xas_find_marked() for all the hard cases. + * + * Return: The next marked entry after the one currently referred to by @xas. + */ +static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, + xa_mark_t mark) +{ + struct xa_node *node = xas->xa_node; + unsigned int offset; + + if (unlikely(xas_not_node(node) || node->shift)) + return xas_find_marked(xas, max, mark); + offset = xas_find_chunk(xas, true, mark); + xas->xa_offset = offset; + xas->xa_index = (xas->xa_index & ~XA_CHUNK_MASK) + offset; + if (xas->xa_index > max) + return NULL; + if (offset == XA_CHUNK_SIZE) + return xas_find_marked(xas, max, mark); + return xa_entry(xas->xa, node, offset); +} + +/* + * If iterating while holding a lock, drop the lock and reschedule + * every %XA_CHECK_SCHED loops. + */ +enum { + XA_CHECK_SCHED = 4096, +}; + +/** + * xas_for_each() - Iterate over a range of an XArray. + * @xas: XArray operation state. + * @entry: Entry retrieved from the array. + * @max: Maximum index to retrieve from array. + * + * The loop body will be executed for each entry present in the xarray + * between the current xas position and @max. @entry will be set to + * the entry retrieved from the xarray. It is safe to delete entries + * from the array in the loop body. You should hold either the RCU lock + * or the xa_lock while iterating. If you need to drop the lock, call + * xas_pause() first. + */ +#define xas_for_each(xas, entry, max) \ + for (entry = xas_find(xas, max); entry; \ + entry = xas_next_entry(xas, max)) + +/** + * xas_for_each_marked() - Iterate over a range of an XArray. + * @xas: XArray operation state. + * @entry: Entry retrieved from the array. + * @max: Maximum index to retrieve from array. + * @mark: Mark to search for. + * + * The loop body will be executed for each marked entry in the xarray + * between the current xas position and @max. @entry will be set to + * the entry retrieved from the xarray. It is safe to delete entries + * from the array in the loop body. You should hold either the RCU lock + * or the xa_lock while iterating. If you need to drop the lock, call + * xas_pause() first. + */ +#define xas_for_each_marked(xas, entry, max, mark) \ + for (entry = xas_find_marked(xas, max, mark); entry; \ + entry = xas_next_marked(xas, max, mark)) + +/** + * xas_for_each_conflict() - Iterate over a range of an XArray. + * @xas: XArray operation state. + * @entry: Entry retrieved from the array. + * + * The loop body will be executed for each entry in the XArray that lies + * within the range specified by @xas. If the loop completes successfully, + * any entries that lie in this range will be replaced by @entry. The caller + * may break out of the loop; if they do so, the contents of the XArray will + * be unchanged. The operation may fail due to an out of memory condition. + * The caller may also call xa_set_err() to exit the loop while setting an + * error to record the reason. + */ +#define xas_for_each_conflict(xas, entry) \ + while ((entry = xas_find_conflict(xas))) + +void *__xas_next(struct xa_state *); +void *__xas_prev(struct xa_state *); + +/** + * xas_prev() - Move iterator to previous index. + * @xas: XArray operation state. + * + * If the @xas was in an error state, it will remain in an error state + * and this function will return %NULL. If the @xas has never been walked, + * it will have the effect of calling xas_load(). Otherwise one will be + * subtracted from the index and the state will be walked to the correct + * location in the array for the next operation. + * + * If the iterator was referencing index 0, this function wraps + * around to %ULONG_MAX. + * + * Return: The entry at the new index. This may be %NULL or an internal + * entry. + */ +static inline void *xas_prev(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + if (unlikely(xas_not_node(node) || node->shift || + xas->xa_offset == 0)) + return __xas_prev(xas); + + xas->xa_index--; + xas->xa_offset--; + return xa_entry(xas->xa, node, xas->xa_offset); +} + +/** + * xas_next() - Move state to next index. + * @xas: XArray operation state. + * + * If the @xas was in an error state, it will remain in an error state + * and this function will return %NULL. If the @xas has never been walked, + * it will have the effect of calling xas_load(). Otherwise one will be + * added to the index and the state will be walked to the correct + * location in the array for the next operation. + * + * If the iterator was referencing index %ULONG_MAX, this function wraps + * around to 0. + * + * Return: The entry at the new index. This may be %NULL or an internal + * entry. + */ +static inline void *xas_next(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + if (unlikely(xas_not_node(node) || node->shift || + xas->xa_offset == XA_CHUNK_MASK)) + return __xas_next(xas); + + xas->xa_index++; + xas->xa_offset++; + return xa_entry(xas->xa, node, xas->xa_offset); +} +#endif /* HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY */ + +#endif /* _LINUX_XARRAY_H */ diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in index 5d8737a..c95d6fc 100644 --- a/libcfs/libcfs/Makefile.in +++ b/libcfs/libcfs/Makefile.in @@ -4,6 +4,7 @@ libcfs-linux-objs := linux-prim.o libcfs-linux-objs += linux-curproc.o libcfs-linux-objs += linux-hash.o libcfs-linux-objs += linux-wait.o +libcfs-linux-objs += xarray.o libcfs-crypto-objs := crypto.o fname.o hkdf.o hooks.o keyring.o libcfs-crypto-objs += keysetup.o keysetup_v1.o policy.o bio.o diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am index 273f97d..cb1d0d2 100644 --- a/libcfs/libcfs/linux/Makefile.am +++ b/libcfs/libcfs/linux/Makefile.am @@ -1,4 +1,5 @@ EXTRA_DIST = linux-prim.c \ linux-curproc.c \ linux-hash.c \ - linux-wait.c + linux-wait.c \ + xarray.c diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c index 3018a71..76b4ca9 100644 --- a/libcfs/libcfs/linux/linux-prim.c +++ b/libcfs/libcfs/linux/linux-prim.c @@ -114,7 +114,7 @@ int cfs_apply_workqueue_attrs(struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(cfs_apply_workqueue_attrs); -struct kmem_cache (*cfs_radix_tree_node_cachep); +struct kmem_cache (*radix_tree_node_cachep); void __init cfs_arch_init(void) { @@ -123,6 +123,8 @@ void __init cfs_arch_init(void) #endif cfs_apply_workqueue_attrs_t = (void *)kallsyms_lookup_name("apply_workqueue_attrs"); + radix_tree_node_cachep = + (void *)kallsyms_lookup_name("radix_tree_node_cachep"); } int cfs_kernel_write(struct file *filp, const void *buf, size_t count, diff --git a/libcfs/libcfs/linux/xarray.c b/libcfs/libcfs/linux/xarray.c new file mode 100644 index 0000000..95063fb --- /dev/null +++ b/libcfs/libcfs/linux/xarray.c @@ -0,0 +1,2101 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * XArray implementation + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox + * + * This is taken from kernel commit: + * + * 7b785645e ("mm: fix page cache convergence regression") + * + * at kernel verison 5.2-rc2 + */ +#ifdef HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY +#include +#include +#include +#include +#include +#include + +/* + * Coding conventions in this file: + * + * @xa is used to refer to the entire xarray. + * @xas is the 'xarray operation state'. It may be either a pointer to + * an xa_state, or an xa_state stored on the stack. This is an unfortunate + * ambiguity. + * @index is the index of the entry being operated on + * @mark is an xa_mark_t; a small number indicating one of the mark bits. + * @node refers to an xa_node; usually the primary one being operated on by + * this function. + * @offset is the index into the slots array inside an xa_node. + * @parent refers to the @xa_node closer to the head than @node. + * @entry refers to something stored in a slot in the xarray + */ + +static inline unsigned int xa_lock_type(const struct xarray *xa) +{ + return (__force unsigned int)xa->xa_flags & 3; +} + +static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type) +{ + if (lock_type == XA_LOCK_IRQ) + xas_lock_irq(xas); + else if (lock_type == XA_LOCK_BH) + xas_lock_bh(xas); + else + xas_lock(xas); +} + +static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type) +{ + if (lock_type == XA_LOCK_IRQ) + xas_unlock_irq(xas); + else if (lock_type == XA_LOCK_BH) + xas_unlock_bh(xas); + else + xas_unlock(xas); +} + +static inline bool xa_track_free(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_TRACK_FREE; +} + +static inline bool xa_zero_busy(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_ZERO_BUSY; +} + +static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) +{ + if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) + xa->xa_flags |= XA_FLAGS_MARK(mark); +} + +static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark) +{ + if (xa->xa_flags & XA_FLAGS_MARK(mark)) + xa->xa_flags &= ~(XA_FLAGS_MARK(mark)); +} + +static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark) +{ + return node->marks[(__force unsigned)mark]; +} + +static inline bool node_get_mark(struct xa_node *node, + unsigned int offset, xa_mark_t mark) +{ + return test_bit(offset, node_marks(node, mark)); +} + +/* returns true if the bit was set */ +static inline bool node_set_mark(struct xa_node *node, unsigned int offset, + xa_mark_t mark) +{ + return __test_and_set_bit(offset, node_marks(node, mark)); +} + +/* returns true if the bit was set */ +static inline bool node_clear_mark(struct xa_node *node, unsigned int offset, + xa_mark_t mark) +{ + return __test_and_clear_bit(offset, node_marks(node, mark)); +} + +static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) +{ + return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); +} + +static inline void node_mark_all(struct xa_node *node, xa_mark_t mark) +{ + bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE); +} + +#define mark_inc(mark) do { \ + mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \ +} while (0) + +/* + * xas_squash_marks() - Merge all marks to the first entry + * @xas: Array operation state. + * + * Set a mark on the first entry if any entry has it set. Clear marks on + * all sibling entries. + */ +static void xas_squash_marks(const struct xa_state *xas) +{ + unsigned int mark = 0; + unsigned int limit = xas->xa_offset + xas->xa_sibs + 1; + + if (!xas->xa_sibs) + return; + + do { + unsigned long *marks = xas->xa_node->marks[mark]; + if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit) + continue; + __set_bit(xas->xa_offset, marks); + bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs); + } while (mark++ != (__force unsigned)XA_MARK_MAX); +} + +/* extracts the offset within this node from the index */ +static unsigned int get_offset(unsigned long index, struct xa_node *node) +{ + return (index >> node->shift) & XA_CHUNK_MASK; +} + +static void xas_set_offset(struct xa_state *xas) +{ + xas->xa_offset = get_offset(xas->xa_index, xas->xa_node); +} + +/* move the index either forwards (find) or backwards (sibling slot) */ +static void xas_move_index(struct xa_state *xas, unsigned long offset) +{ + unsigned int shift = xas->xa_node->shift; + xas->xa_index &= ~XA_CHUNK_MASK << shift; + xas->xa_index += offset << shift; +} + +static void xas_advance(struct xa_state *xas) +{ + xas->xa_offset++; + xas_move_index(xas, xas->xa_offset); +} + +static void *set_bounds(struct xa_state *xas) +{ + xas->xa_node = XAS_BOUNDS; + return NULL; +} + +/* + * Starts a walk. If the @xas is already valid, we assume that it's on + * the right path and just return where we've got to. If we're in an + * error state, return NULL. If the index is outside the current scope + * of the xarray, return NULL without changing @xas->xa_node. Otherwise + * set @xas->xa_node to NULL and return the current head of the array. + */ +static void *xas_start(struct xa_state *xas) +{ + void *entry; + + if (xas_valid(xas)) + return xas_reload(xas); + if (xas_error(xas)) + return NULL; + + entry = xa_head(xas->xa); + if (!xa_is_node(entry)) { + if (xas->xa_index) + return set_bounds(xas); + } else { + if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK) + return set_bounds(xas); + } + + xas->xa_node = NULL; + return entry; +} + +static void *xas_descend(struct xa_state *xas, struct xa_node *node) +{ + unsigned int offset = get_offset(xas->xa_index, node); + void *entry = xa_entry(xas->xa, node, offset); + + xas->xa_node = node; + if (xa_is_sibling(entry)) { + offset = xa_to_sibling(entry); + entry = xa_entry(xas->xa, node, offset); + } + + xas->xa_offset = offset; + return entry; +} + +/** + * xas_load() - Load an entry from the XArray (advanced). + * @xas: XArray operation state. + * + * Usually walks the @xas to the appropriate state to load the entry + * stored at xa_index. However, it will do nothing and return %NULL if + * @xas is in an error state. xas_load() will never expand the tree. + * + * If the xa_state is set up to operate on a multi-index entry, xas_load() + * may return %NULL or an internal entry, even if there are entries + * present within the range specified by @xas. + * + * Context: Any context. The caller should hold the xa_lock or the RCU lock. + * Return: Usually an entry in the XArray, but see description for exceptions. + */ +void *xas_load(struct xa_state *xas) +{ + void *entry = xas_start(xas); + + while (xa_is_node(entry)) { + struct xa_node *node = xa_to_node(entry); + + if (xas->xa_shift > node->shift) + break; + entry = xas_descend(xas, node); + if (node->shift == 0) + break; + } + return entry; +} +EXPORT_SYMBOL_GPL(xas_load); + +/* Move the radix tree node cache here */ +extern struct kmem_cache *radix_tree_node_cachep; + +static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __clear_bit(offset, node->tags[tag]); +} + +static void radix_tree_node_rcu_free(struct rcu_head *head) +{ + struct radix_tree_node *node = + container_of(head, struct radix_tree_node, rcu_head); + int i; + + /* + * must only free zeroed nodes into the slab. radix_tree_shrink + * can leave us with a non-NULL entry in the first slot, so clear + * that here to make sure. + */ + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) + tag_clear(node, i, 0); + + node->slots[0] = NULL; + node->count = 0; + + kmem_cache_free(radix_tree_node_cachep, node); +} + +#define XA_RCU_FREE ((struct xarray *)1) + +static void xa_node_free(struct xa_node *node) +{ + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + node->array = XA_RCU_FREE; + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); +} + +/* + * xas_destroy() - Free any resources allocated during the XArray operation. + * @xas: XArray operation state. + * + * This function is now internal-only. + */ +static void xas_destroy(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_alloc; + + if (!node) + return; + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + kmem_cache_free(radix_tree_node_cachep, node); + xas->xa_alloc = NULL; +} + +/** + * xas_nomem() - Allocate memory if needed. + * @xas: XArray operation state. + * @gfp: Memory allocation flags. + * + * If we need to add new nodes to the XArray, we try to allocate memory + * with GFP_NOWAIT while holding the lock, which will usually succeed. + * If it fails, @xas is flagged as needing memory to continue. The caller + * should drop the lock and call xas_nomem(). If xas_nomem() succeeds, + * the caller should retry the operation. + * + * Forward progress is guaranteed as one node is allocated here and + * stored in the xa_state where it will be found by xas_alloc(). More + * nodes will likely be found in the slab allocator, but we do not tie + * them up here. + * + * Return: true if memory was needed, and was successfully allocated. + */ +bool xas_nomem(struct xa_state *xas, gfp_t gfp) +{ + if (xas->xa_node != XA_ERROR(-ENOMEM)) { + xas_destroy(xas); + return false; + } +#ifdef __GFP_ACCOUNT + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; +#endif + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!xas->xa_alloc) + return false; + XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); + xas->xa_node = XAS_RESTART; + return true; +} +EXPORT_SYMBOL_GPL(xas_nomem); + +/* + * __xas_nomem() - Drop locks and allocate memory if needed. + * @xas: XArray operation state. + * @gfp: Memory allocation flags. + * + * Internal variant of xas_nomem(). + * + * Return: true if memory was needed, and was successfully allocated. + */ +static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) + __must_hold(xas->xa->xa_lock) +{ + unsigned int lock_type = xa_lock_type(xas->xa); + + if (xas->xa_node != XA_ERROR(-ENOMEM)) { + xas_destroy(xas); + return false; + } +#ifdef __GFP_ACCOUNT + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; +#endif + if (gfpflags_allow_blocking(gfp)) { + xas_unlock_type(xas, lock_type); + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + xas_lock_type(xas, lock_type); + } else { + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + } + if (!xas->xa_alloc) + return false; + XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); + xas->xa_node = XAS_RESTART; + return true; +} + +static void xas_update(struct xa_state *xas, struct xa_node *node) +{ + if (xas->xa_update) + xas->xa_update(node); + else + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); +} + +static void *xas_alloc(struct xa_state *xas, unsigned int shift) +{ + struct xa_node *parent = xas->xa_node; + struct xa_node *node = xas->xa_alloc; + + if (xas_invalid(xas)) + return NULL; + + if (node) { + xas->xa_alloc = NULL; + } else { + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; +#ifdef __GFP_ACCOUNT + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; +#endif + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!node) { + xas_set_err(xas, -ENOMEM); + return NULL; + } + } + + if (parent) { + node->offset = xas->xa_offset; + parent->count++; + XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE); + xas_update(xas, parent); + } + XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + node->shift = shift; + node->count = 0; + node->nr_values = 0; + RCU_INIT_POINTER(node->parent, xas->xa_node); + node->array = xas->xa; + + return node; +} + +#ifdef CONFIG_XARRAY_MULTI +/* Returns the number of indices covered by a given xa_state */ +static unsigned long xas_size(const struct xa_state *xas) +{ + return (xas->xa_sibs + 1UL) << xas->xa_shift; +} +#endif + +/* + * Use this to calculate the maximum index that will need to be created + * in order to add the entry described by @xas. Because we cannot store a + * multiple-index entry at index 0, the calculation is a little more complex + * than you might expect. + */ +static unsigned long xas_max(struct xa_state *xas) +{ + unsigned long max = xas->xa_index; + +#ifdef CONFIG_XARRAY_MULTI + if (xas->xa_shift || xas->xa_sibs) { + unsigned long mask = xas_size(xas) - 1; + max |= mask; + if (mask == max) + max++; + } +#endif + + return max; +} + +/* The maximum index that can be contained in the array without expanding it */ +static unsigned long max_index(void *entry) +{ + if (!xa_is_node(entry)) + return 0; + return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1; +} + +static void xas_shrink(struct xa_state *xas) +{ + struct xarray *xa = xas->xa; + struct xa_node *node = xas->xa_node; + + for (;;) { + void *entry; + + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + if (node->count != 1) + break; + entry = xa_entry_locked(xa, node, 0); + if (!entry) + break; + if (!xa_is_node(entry) && node->shift) + break; + if (xa_is_zero(entry) && xa_zero_busy(xa)) + entry = NULL; + xas->xa_node = XAS_BOUNDS; + + RCU_INIT_POINTER(xa->xa_head, entry); + if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK)) + xa_mark_clear(xa, XA_FREE_MARK); + + node->count = 0; + node->nr_values = 0; + if (!xa_is_node(entry)) + RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY); + xas_update(xas, node); + xa_node_free(node); + if (!xa_is_node(entry)) + break; + node = xa_to_node(entry); + node->parent = NULL; + } +} + +/* + * xas_delete_node() - Attempt to delete an xa_node + * @xas: Array operation state. + * + * Attempts to delete the @xas->xa_node. This will fail if xa->node has + * a non-zero reference count. + */ +static void xas_delete_node(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + for (;;) { + struct xa_node *parent; + + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + if (node->count) + break; + + parent = xa_parent_locked(xas->xa, node); + xas->xa_node = parent; + xas->xa_offset = node->offset; + xa_node_free(node); + + if (!parent) { + xas->xa->xa_head = NULL; + xas->xa_node = XAS_BOUNDS; + return; + } + + parent->slots[xas->xa_offset] = NULL; + parent->count--; + XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE); + node = parent; + xas_update(xas, node); + } + + if (!node->parent) + xas_shrink(xas); +} + +/** + * xas_free_nodes() - Free this node and all nodes that it references + * @xas: Array operation state. + * @top: Node to free + * + * This node has been removed from the tree. We must now free it and all + * of its subnodes. There may be RCU walkers with references into the tree, + * so we must replace all entries with retry markers. + */ +static void xas_free_nodes(struct xa_state *xas, struct xa_node *top) +{ + unsigned int offset = 0; + struct xa_node *node = top; + + for (;;) { + void *entry = xa_entry_locked(xas->xa, node, offset); + + if (node->shift && xa_is_node(entry)) { + node = xa_to_node(entry); + offset = 0; + continue; + } + if (entry) + RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY); + offset++; + while (offset == XA_CHUNK_SIZE) { + struct xa_node *parent; + + parent = xa_parent_locked(xas->xa, node); + offset = node->offset + 1; + node->count = 0; + node->nr_values = 0; + xas_update(xas, node); + xa_node_free(node); + if (node == top) + return; + node = parent; + } + } +} + +/* + * xas_expand adds nodes to the head of the tree until it has reached + * sufficient height to be able to contain @xas->xa_index + */ +static int xas_expand(struct xa_state *xas, void *head) +{ + struct xarray *xa = xas->xa; + struct xa_node *node = NULL; + unsigned int shift = 0; + unsigned long max = xas_max(xas); + + if (!head) { + if (max == 0) + return 0; + while ((max >> shift) >= XA_CHUNK_SIZE) + shift += XA_CHUNK_SHIFT; + return shift + XA_CHUNK_SHIFT; + } else if (xa_is_node(head)) { + node = xa_to_node(head); + shift = node->shift + XA_CHUNK_SHIFT; + } + xas->xa_node = NULL; + + while (max > max_index(head)) { + xa_mark_t mark = 0; + + XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); + node = xas_alloc(xas, shift); + if (!node) + return -ENOMEM; + + node->count = 1; + if (xa_is_value(head)) + node->nr_values = 1; + RCU_INIT_POINTER(node->slots[0], head); + + /* Propagate the aggregated mark info to the new child */ + for (;;) { + if (xa_track_free(xa) && mark == XA_FREE_MARK) { + node_mark_all(node, XA_FREE_MARK); + if (!xa_marked(xa, XA_FREE_MARK)) { + node_clear_mark(node, 0, XA_FREE_MARK); + xa_mark_set(xa, XA_FREE_MARK); + } + } else if (xa_marked(xa, mark)) { + node_set_mark(node, 0, mark); + } + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } + + /* + * Now that the new node is fully initialised, we can add + * it to the tree + */ + if (xa_is_node(head)) { + xa_to_node(head)->offset = 0; + rcu_assign_pointer(xa_to_node(head)->parent, node); + } + head = xa_mk_node(node); + rcu_assign_pointer(xa->xa_head, head); + xas_update(xas, node); + + shift += XA_CHUNK_SHIFT; + } + + xas->xa_node = node; + return shift; +} + +/* + * xas_create() - Create a slot to store an entry in. + * @xas: XArray operation state. + * @allow_root: %true if we can store the entry in the root directly + * + * Most users will not need to call this function directly, as it is called + * by xas_store(). It is useful for doing conditional store operations + * (see the xa_cmpxchg() implementation for an example). + * + * Return: If the slot already existed, returns the contents of this slot. + * If the slot was newly created, returns %NULL. If it failed to create the + * slot, returns %NULL and indicates the error in @xas. + */ +static void *xas_create(struct xa_state *xas, bool allow_root) +{ + struct xarray *xa = xas->xa; + void *entry; + void __rcu **slot; + struct xa_node *node = xas->xa_node; + int shift; + unsigned int order = xas->xa_shift; + + if (xas_top(node)) { + entry = xa_head_locked(xa); + xas->xa_node = NULL; + if (!entry && xa_zero_busy(xa)) + entry = XA_ZERO_ENTRY; + shift = xas_expand(xas, entry); + if (shift < 0) + return NULL; + if (!shift && !allow_root) + shift = XA_CHUNK_SHIFT; + entry = xa_head_locked(xa); + slot = &xa->xa_head; + } else if (xas_error(xas)) { + return NULL; + } else if (node) { + unsigned int offset = xas->xa_offset; + + shift = node->shift; + entry = xa_entry_locked(xa, node, offset); + slot = &node->slots[offset]; + } else { + shift = 0; + entry = xa_head_locked(xa); + slot = &xa->xa_head; + } + + while (shift > order) { + shift -= XA_CHUNK_SHIFT; + if (!entry) { + node = xas_alloc(xas, shift); + if (!node) + break; + if (xa_track_free(xa)) + node_mark_all(node, XA_FREE_MARK); + rcu_assign_pointer(*slot, xa_mk_node(node)); + } else if (xa_is_node(entry)) { + node = xa_to_node(entry); + } else { + break; + } + entry = xas_descend(xas, node); + slot = &node->slots[xas->xa_offset]; + } + + return entry; +} + +/** + * xas_create_range() - Ensure that stores to this range will succeed + * @xas: XArray operation state. + * + * Creates all of the slots in the range covered by @xas. Sets @xas to + * create single-index entries and positions it at the beginning of the + * range. This is for the benefit of users which have not yet been + * converted to use multi-index entries. + */ +void xas_create_range(struct xa_state *xas) +{ + unsigned long index = xas->xa_index; + unsigned char shift = xas->xa_shift; + unsigned char sibs = xas->xa_sibs; + + xas->xa_index |= ((sibs + 1) << shift) - 1; + if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) + xas->xa_offset |= sibs; + xas->xa_shift = 0; + xas->xa_sibs = 0; + + for (;;) { + xas_create(xas, true); + if (xas_error(xas)) + goto restore; + if (xas->xa_index <= (index | XA_CHUNK_MASK)) + goto success; + xas->xa_index -= XA_CHUNK_SIZE; + + for (;;) { + struct xa_node *node = xas->xa_node; + xas->xa_node = xa_parent_locked(xas->xa, node); + xas->xa_offset = node->offset - 1; + if (node->offset != 0) + break; + } + } + +restore: + xas->xa_shift = shift; + xas->xa_sibs = sibs; + xas->xa_index = index; + return; +success: + xas->xa_index = index; + if (xas->xa_node) + xas_set_offset(xas); +} +EXPORT_SYMBOL_GPL(xas_create_range); + +static void update_node(struct xa_state *xas, struct xa_node *node, + int count, int values) +{ + if (!node || (!count && !values)) + return; + + node->count += count; + node->nr_values += values; + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE); + xas_update(xas, node); + if (count < 0) + xas_delete_node(xas); +} + +/** + * xas_store() - Store this entry in the XArray. + * @xas: XArray operation state. + * @entry: New entry. + * + * If @xas is operating on a multi-index entry, the entry returned by this + * function is essentially meaningless (it may be an internal entry or it + * may be %NULL, even if there are non-NULL entries at some of the indices + * covered by the range). This is not a problem for any current users, + * and can be changed if needed. + * + * Return: The old entry at this index. + */ +void *xas_store(struct xa_state *xas, void *entry) +{ + struct xa_node *node; + void __rcu **slot = &xas->xa->xa_head; + unsigned int offset, max; + int count = 0; + int values = 0; + void *first, *next; + bool value = xa_is_value(entry); + + if (entry) { + bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry); + first = xas_create(xas, allow_root); + } else { + first = xas_load(xas); + } + + if (xas_invalid(xas)) + return first; + node = xas->xa_node; + if (node && (xas->xa_shift < node->shift)) + xas->xa_sibs = 0; + if ((first == entry) && !xas->xa_sibs) + return first; + + next = first; + offset = xas->xa_offset; + max = xas->xa_offset + xas->xa_sibs; + if (node) { + slot = &node->slots[offset]; + if (xas->xa_sibs) + xas_squash_marks(xas); + } + if (!entry) + xas_init_marks(xas); + + for (;;) { + /* + * Must clear the marks before setting the entry to NULL, + * otherwise xas_for_each_marked may find a NULL entry and + * stop early. rcu_assign_pointer contains a release barrier + * so the mark clearing will appear to happen before the + * entry is set to NULL. + */ + rcu_assign_pointer(*slot, entry); + if (xa_is_node(next) && (!node || node->shift)) + xas_free_nodes(xas, xa_to_node(next)); + if (!node) + break; + count += !next - !entry; + values += !xa_is_value(first) - !value; + if (entry) { + if (offset == max) + break; + if (!xa_is_sibling(entry)) + entry = xa_mk_sibling(xas->xa_offset); + } else { + if (offset == XA_CHUNK_MASK) + break; + } + next = xa_entry_locked(xas->xa, node, ++offset); + if (!xa_is_sibling(next)) { + if (!entry && (offset > max)) + break; + first = next; + } + slot++; + } + + update_node(xas, node, count, values); + return first; +} +EXPORT_SYMBOL_GPL(xas_store); + +/** + * xas_get_mark() - Returns the state of this mark. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Return: true if the mark is set, false if the mark is clear or @xas + * is in an error state. + */ +bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark) +{ + if (xas_invalid(xas)) + return false; + if (!xas->xa_node) + return xa_marked(xas->xa, mark); + return node_get_mark(xas->xa_node, xas->xa_offset, mark); +} +EXPORT_SYMBOL_GPL(xas_get_mark); + +/** + * xas_set_mark() - Sets the mark on this entry and its parents. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Sets the specified mark on this entry, and walks up the tree setting it + * on all the ancestor entries. Does nothing if @xas has not been walked to + * an entry, or is in an error state. + */ +void xas_set_mark(const struct xa_state *xas, xa_mark_t mark) +{ + struct xa_node *node = xas->xa_node; + unsigned int offset = xas->xa_offset; + + if (xas_invalid(xas)) + return; + + while (node) { + if (node_set_mark(node, offset, mark)) + return; + offset = node->offset; + node = xa_parent_locked(xas->xa, node); + } + + if (!xa_marked(xas->xa, mark)) + xa_mark_set(xas->xa, mark); +} +EXPORT_SYMBOL_GPL(xas_set_mark); + +/** + * xas_clear_mark() - Clears the mark on this entry and its parents. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Clears the specified mark on this entry, and walks back to the head + * attempting to clear it on all the ancestor entries. Does nothing if + * @xas has not been walked to an entry, or is in an error state. + */ +void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark) +{ + struct xa_node *node = xas->xa_node; + unsigned int offset = xas->xa_offset; + + if (xas_invalid(xas)) + return; + + while (node) { + if (!node_clear_mark(node, offset, mark)) + return; + if (node_any_mark(node, mark)) + return; + + offset = node->offset; + node = xa_parent_locked(xas->xa, node); + } + + if (xa_marked(xas->xa, mark)) + xa_mark_clear(xas->xa, mark); +} +EXPORT_SYMBOL_GPL(xas_clear_mark); + +/** + * xas_init_marks() - Initialise all marks for the entry + * @xas: Array operations state. + * + * Initialise all marks for the entry specified by @xas. If we're tracking + * free entries with a mark, we need to set it on all entries. All other + * marks are cleared. + * + * This implementation is not as efficient as it could be; we may walk + * up the tree multiple times. + */ +void xas_init_marks(const struct xa_state *xas) +{ + xa_mark_t mark = 0; + + for (;;) { + if (xa_track_free(xas->xa) && mark == XA_FREE_MARK) + xas_set_mark(xas, mark); + else + xas_clear_mark(xas, mark); + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } +} +EXPORT_SYMBOL_GPL(xas_init_marks); + +/** + * xas_pause() - Pause a walk to drop a lock. + * @xas: XArray operation state. + * + * Some users need to pause a walk and drop the lock they're holding in + * order to yield to a higher priority thread or carry out an operation + * on an entry. Those users should call this function before they drop + * the lock. It resets the @xas to be suitable for the next iteration + * of the loop after the user has reacquired the lock. If most entries + * found during a walk require you to call xas_pause(), the xa_for_each() + * iterator may be more appropriate. + * + * Note that xas_pause() only works for forward iteration. If a user needs + * to pause a reverse iteration, we will need a xas_pause_rev(). + */ +void xas_pause(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + if (xas_invalid(xas)) + return; + + if (node) { + unsigned int offset = xas->xa_offset; + while (++offset < XA_CHUNK_SIZE) { + if (!xa_is_sibling(xa_entry(xas->xa, node, offset))) + break; + } + xas->xa_index += (offset - xas->xa_offset) << node->shift; + } else { + xas->xa_index++; + } + xas->xa_node = XAS_RESTART; +} +EXPORT_SYMBOL_GPL(xas_pause); + +/* + * __xas_prev() - Find the previous entry in the XArray. + * @xas: XArray operation state. + * + * Helper function for xas_prev() which handles all the complex cases + * out of line. + */ +void *__xas_prev(struct xa_state *xas) +{ + void *entry; + + if (!xas_frozen(xas->xa_node)) + xas->xa_index--; + if (!xas->xa_node) + return set_bounds(xas); + if (xas_not_node(xas->xa_node)) + return xas_load(xas); + + if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) + xas->xa_offset--; + + while (xas->xa_offset == 255) { + xas->xa_offset = xas->xa_node->offset - 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + return set_bounds(xas); + } + + for (;;) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } +} +EXPORT_SYMBOL_GPL(__xas_prev); + +/* + * __xas_next() - Find the next entry in the XArray. + * @xas: XArray operation state. + * + * Helper function for xas_next() which handles all the complex cases + * out of line. + */ +void *__xas_next(struct xa_state *xas) +{ + void *entry; + + if (!xas_frozen(xas->xa_node)) + xas->xa_index++; + if (!xas->xa_node) + return set_bounds(xas); + if (xas_not_node(xas->xa_node)) + return xas_load(xas); + + if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) + xas->xa_offset++; + + while (xas->xa_offset == XA_CHUNK_SIZE) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + return set_bounds(xas); + } + + for (;;) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } +} +EXPORT_SYMBOL_GPL(__xas_next); + +/** + * xas_find() - Find the next present entry in the XArray. + * @xas: XArray operation state. + * @max: Highest index to return. + * + * If the @xas has not yet been walked to an entry, return the entry + * which has an index >= xas.xa_index. If it has been walked, the entry + * currently being pointed at has been processed, and so we move to the + * next entry. + * + * If no entry is found and the array is smaller than @max, the iterator + * is set to the smallest index not yet in the array. This allows @xas + * to be immediately passed to xas_store(). + * + * Return: The entry, if found, otherwise %NULL. + */ +void *xas_find(struct xa_state *xas, unsigned long max) +{ + void *entry; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) { + xas->xa_index = 1; + return set_bounds(xas); + } else if (xas_top(xas->xa_node)) { + entry = xas_load(xas); + if (entry || xas_not_node(xas->xa_node)) + return entry; + } else if (!xas->xa_node->shift && + xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) { + xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; + } + + xas_advance(xas); + + while (xas->xa_node && (xas->xa_index <= max)) { + if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + continue; + } + + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (xa_is_node(entry)) { + xas->xa_node = xa_to_node(entry); + xas->xa_offset = 0; + continue; + } + if (entry && !xa_is_sibling(entry)) + return entry; + + xas_advance(xas); + } + + if (!xas->xa_node) + xas->xa_node = XAS_BOUNDS; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find); + +/** + * xas_find_marked() - Find the next marked entry in the XArray. + * @xas: XArray operation state. + * @max: Highest index to return. + * @mark: Mark number to search for. + * + * If the @xas has not yet been walked to an entry, return the marked entry + * which has an index >= xas.xa_index. If it has been walked, the entry + * currently being pointed at has been processed, and so we return the + * first marked entry with an index > xas.xa_index. + * + * If no marked entry is found and the array is smaller than @max, @xas is + * set to the bounds state and xas->xa_index is set to the smallest index + * not yet in the array. This allows @xas to be immediately passed to + * xas_store(). + * + * If no entry is found before @max is reached, @xas is set to the restart + * state. + * + * Return: The entry, if found, otherwise %NULL. + */ +void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) +{ + bool advance = true; + unsigned int offset; + void *entry; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) { + xas->xa_index = 1; + goto out; + } else if (xas_top(xas->xa_node)) { + advance = false; + entry = xa_head(xas->xa); + xas->xa_node = NULL; + if (xas->xa_index > max_index(entry)) + goto out; + if (!xa_is_node(entry)) { + if (xa_marked(xas->xa, mark)) + return entry; + xas->xa_index = 1; + goto out; + } + xas->xa_node = xa_to_node(entry); + xas->xa_offset = xas->xa_index >> xas->xa_node->shift; + } + + while (xas->xa_index <= max) { + if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + break; + advance = false; + continue; + } + + if (!advance) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (xa_is_sibling(entry)) { + xas->xa_offset = xa_to_sibling(entry); + xas_move_index(xas, xas->xa_offset); + } + } + + offset = xas_find_chunk(xas, advance, mark); + if (offset > xas->xa_offset) { + advance = false; + xas_move_index(xas, offset); + /* Mind the wrap */ + if ((xas->xa_index - 1) >= max) + goto max; + xas->xa_offset = offset; + if (offset == XA_CHUNK_SIZE) + continue; + } + + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } + +out: + if (xas->xa_index > max) + goto max; + return set_bounds(xas); +max: + xas->xa_node = XAS_RESTART; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find_marked); + +/** + * xas_find_conflict() - Find the next present entry in a range. + * @xas: XArray operation state. + * + * The @xas describes both a range and a position within that range. + * + * Context: Any context. Expects xa_lock to be held. + * Return: The next entry in the range covered by @xas or %NULL. + */ +void *xas_find_conflict(struct xa_state *xas) +{ + void *curr; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) + return NULL; + + if (xas_top(xas->xa_node)) { + curr = xas_start(xas); + if (!curr) + return NULL; + while (xa_is_node(curr)) { + struct xa_node *node = xa_to_node(curr); + curr = xas_descend(xas, node); + } + if (curr) + return curr; + } + + if (xas->xa_node->shift > xas->xa_shift) + return NULL; + + for (;;) { + if (xas->xa_node->shift == xas->xa_shift) { + if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs) + break; + } else if (xas->xa_offset == XA_CHUNK_MASK) { + xas->xa_offset = xas->xa_node->offset; + xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node); + if (!xas->xa_node) + break; + continue; + } + curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset); + if (xa_is_sibling(curr)) + continue; + while (xa_is_node(curr)) { + xas->xa_node = xa_to_node(curr); + xas->xa_offset = 0; + curr = xa_entry_locked(xas->xa, xas->xa_node, 0); + } + if (curr) + return curr; + } + xas->xa_offset -= xas->xa_sibs; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find_conflict); + +/** + * xa_load() - Load an entry from an XArray. + * @xa: XArray. + * @index: index into array. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The entry at @index in @xa. + */ +void *xa_load(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + void *entry; + + rcu_read_lock(); + do { + entry = xas_load(&xas); + if (xa_is_zero(entry)) + entry = NULL; + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + return entry; +} +EXPORT_SYMBOL(xa_load); + +static void *xas_result(struct xa_state *xas, void *curr) +{ + if (xa_is_zero(curr)) + return NULL; + if (xas_error(xas)) + curr = xas->xa_node; + return curr; +} + +/** + * __xa_erase() - Erase this entry from the XArray while locked. + * @xa: XArray. + * @index: Index into array. + * + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. + * + * Context: Any context. Expects xa_lock to be held on entry. + * Return: The entry which used to be at this index. + */ +void *__xa_erase(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + return xas_result(&xas, xas_store(&xas, NULL)); +} +EXPORT_SYMBOL(__xa_erase); + +/** + * xa_erase() - Erase this entry from the XArray. + * @xa: XArray. + * @index: Index of entry. + * + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. + * + * Context: Any context. Takes and releases the xa_lock. + * Return: The entry which used to be at this index. + */ +void *xa_erase(struct xarray *xa, unsigned long index) +{ + void *entry; + + xa_lock(xa); + entry = __xa_erase(xa, index); + xa_unlock(xa); + + return entry; +} +EXPORT_SYMBOL(xa_erase); + +/** + * __xa_store() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * You must already be holding the xa_lock when calling this function. + * It will drop the lock if needed to allocate memory, and then reacquire + * it afterwards. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: The old entry at this index or xa_err() if an error happened. + */ +void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return XA_ERROR(-EINVAL); + if (xa_track_free(xa) && !entry) + entry = XA_ZERO_ENTRY; + + do { + curr = xas_store(&xas, entry); + if (xa_track_free(xa)) + xas_clear_mark(&xas, XA_FREE_MARK); + } while (__xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(__xa_store); + +/** + * xa_store() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * After this function returns, loads from this index will return @entry. + * Storing into an existing multislot entry updates the entry of every index. + * The marks associated with @index are unaffected unless @entry is %NULL. + * + * Context: Any context. Takes and releases the xa_lock. + * May sleep if the @gfp flags permit. + * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry + * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation + * failed. + */ +void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + void *curr; + + xa_lock(xa); + curr = __xa_store(xa, index, entry, gfp); + xa_unlock(xa); + + return curr; +} +EXPORT_SYMBOL(xa_store); + +/** + * __xa_cmpxchg() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * You must already be holding the xa_lock when calling this function. + * It will drop the lock if needed to allocate memory, and then reacquire + * it afterwards. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: The old entry at this index or xa_err() if an error happened. + */ +void *__xa_cmpxchg(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return XA_ERROR(-EINVAL); + + do { + curr = xas_load(&xas); + if (curr == old) { + xas_store(&xas, entry); + if (xa_track_free(xa) && entry && !curr) + xas_clear_mark(&xas, XA_FREE_MARK); + } + } while (__xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(__xa_cmpxchg); + +/** + * __xa_insert() - Store this entry in the XArray if no entry is present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return -EINVAL; + if (!entry) + entry = XA_ZERO_ENTRY; + + do { + curr = xas_load(&xas); + if (!curr) { + xas_store(&xas, entry); + if (xa_track_free(xa)) + xas_clear_mark(&xas, XA_FREE_MARK); + } else { + xas_set_err(&xas, -EBUSY); + } + } while (__xas_nomem(&xas, gfp)); + + return xas_error(&xas); +} +EXPORT_SYMBOL(__xa_insert); + +#ifdef CONFIG_XARRAY_MULTI +static void xas_set_range(struct xa_state *xas, unsigned long first, + unsigned long last) +{ + unsigned int shift = 0; + unsigned long sibs = last - first; + unsigned int offset = XA_CHUNK_MASK; + + xas_set(xas, first); + + while ((first & XA_CHUNK_MASK) == 0) { + if (sibs < XA_CHUNK_MASK) + break; + if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK)) + break; + shift += XA_CHUNK_SHIFT; + if (offset == XA_CHUNK_MASK) + offset = sibs & XA_CHUNK_MASK; + sibs >>= XA_CHUNK_SHIFT; + first >>= XA_CHUNK_SHIFT; + } + + offset = first & XA_CHUNK_MASK; + if (offset + sibs > XA_CHUNK_MASK) + sibs = XA_CHUNK_MASK - offset; + if ((((first + sibs + 1) << shift) - 1) > last) + sibs -= 1; + + xas->xa_shift = shift; + xas->xa_sibs = sibs; +} + +/** + * xa_store_range() - Store this entry at a range of indices in the XArray. + * @xa: XArray. + * @first: First index to affect. + * @last: Last index to affect. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * After this function returns, loads from any index between @first and @last, + * inclusive will return @entry. + * Storing into an existing multislot entry updates the entry of every index. + * The marks associated with @index are unaffected unless @entry is %NULL. + * + * Context: Process context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in + * an XArray, or xa_err(-ENOMEM) if memory allocation failed. + */ +void *xa_store_range(struct xarray *xa, unsigned long first, + unsigned long last, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, 0); + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + if (last < first) + return XA_ERROR(-EINVAL); + + do { + xas_lock(&xas); + if (entry) { + unsigned int order = BITS_PER_LONG; + if (last + 1) + order = __ffs(last + 1); + xas_set_order(&xas, last, order); + xas_create(&xas, true); + if (xas_error(&xas)) + goto unlock; + } + do { + xas_set_range(&xas, first, last); + xas_store(&xas, entry); + if (xas_error(&xas)) + goto unlock; + first += xas_size(&xas); + } while (first <= last); +unlock: + xas_unlock(&xas); + } while (xas_nomem(&xas, gfp)); + + return xas_result(&xas, NULL); +} +EXPORT_SYMBOL(xa_store_range); +#endif /* CONFIG_XARRAY_MULTI */ + +/** + * __xa_alloc() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @limit: Range for allocated ID. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. + */ +int __xa_alloc(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, gfp_t gfp) +{ + XA_STATE(xas, xa, 0); + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return -EINVAL; + if (WARN_ON_ONCE(!xa_track_free(xa))) + return -EINVAL; + + if (!entry) + entry = XA_ZERO_ENTRY; + + do { + xas.xa_index = limit.min; + xas_find_marked(&xas, limit.max, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -EBUSY); + else + *id = xas.xa_index; + xas_store(&xas, entry); + xas_clear_mark(&xas, XA_FREE_MARK); + } while (__xas_nomem(&xas, gfp)); + + return xas_error(&xas); +} +EXPORT_SYMBOL(__xa_alloc); + +/** + * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + u32 min = limit.min; + int ret; + + limit.min = max(min, *next); + ret = __xa_alloc(xa, id, entry, limit, gfp); + if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { + xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; + ret = 1; + } + + if (ret < 0 && limit.min > min) { + limit.min = min; + ret = __xa_alloc(xa, id, entry, limit, gfp); + if (ret == 0) + ret = 1; + } + + if (ret >= 0) { + *next = *id + 1; + if (*next == 0) + xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; + } + return ret; +} +EXPORT_SYMBOL(__xa_alloc_cyclic); + +/** + * __xa_set_mark() - Set this mark on this entry while locked. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Attempting to set a mark on a %NULL entry does not succeed. + * + * Context: Any context. Expects xa_lock to be held on entry. + */ +void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry = xas_load(&xas); + + if (entry) + xas_set_mark(&xas, mark); +} +EXPORT_SYMBOL(__xa_set_mark); + +/** + * __xa_clear_mark() - Clear this mark on this entry while locked. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Context: Any context. Expects xa_lock to be held on entry. + */ +void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry = xas_load(&xas); + + if (entry) + xas_clear_mark(&xas, mark); +} +EXPORT_SYMBOL(__xa_clear_mark); + +/** + * xa_get_mark() - Inquire whether this mark is set on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * This function uses the RCU read lock, so the result may be out of date + * by the time it returns. If you need the result to be stable, use a lock. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: True if the entry at @index has this mark set, false if it doesn't. + */ +bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry; + + rcu_read_lock(); + entry = xas_start(&xas); + while (xas_get_mark(&xas, mark)) { + if (!xa_is_node(entry)) + goto found; + entry = xas_descend(&xas, xa_to_node(entry)); + } + rcu_read_unlock(); + return false; + found: + rcu_read_unlock(); + return true; +} +EXPORT_SYMBOL(xa_get_mark); + +/** + * xa_set_mark() - Set this mark on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Attempting to set a mark on a %NULL entry does not succeed. + * + * Context: Process context. Takes and releases the xa_lock. + */ +void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + xa_lock(xa); + __xa_set_mark(xa, index, mark); + xa_unlock(xa); +} +EXPORT_SYMBOL(xa_set_mark); + +/** + * xa_clear_mark() - Clear this mark on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Clearing a mark always succeeds. + * + * Context: Process context. Takes and releases the xa_lock. + */ +void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + xa_lock(xa); + __xa_clear_mark(xa, index, mark); + xa_unlock(xa); +} +EXPORT_SYMBOL(xa_clear_mark); + +/** + * xa_find() - Search the XArray for an entry. + * @xa: XArray. + * @indexp: Pointer to an index. + * @max: Maximum index to search to. + * @filter: Selection criterion. + * + * Finds the entry in @xa which matches the @filter, and has the lowest + * index that is at least @indexp and no more than @max. + * If an entry is found, @indexp is updated to be the index of the entry. + * This function is protected by the RCU read lock, so it may not find + * entries which are being simultaneously added. It will not return an + * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The entry, if found, otherwise %NULL. + */ +void *xa_find(struct xarray *xa, unsigned long *indexp, + unsigned long max, xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp); + void *entry; + + rcu_read_lock(); + do { + if ((__force unsigned int)filter < XA_MAX_MARKS) + entry = xas_find_marked(&xas, max, filter); + else + entry = xas_find(&xas, max); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + if (entry) + *indexp = xas.xa_index; + return entry; +} +EXPORT_SYMBOL(xa_find); + +/** + * xa_find_after() - Search the XArray for a present entry. + * @xa: XArray. + * @indexp: Pointer to an index. + * @max: Maximum index to search to. + * @filter: Selection criterion. + * + * Finds the entry in @xa which matches the @filter and has the lowest + * index that is above @indexp and no more than @max. + * If an entry is found, @indexp is updated to be the index of the entry. + * This function is protected by the RCU read lock, so it may miss entries + * which are being simultaneously added. It will not return an + * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The pointer, if found, otherwise %NULL. + */ +void *xa_find_after(struct xarray *xa, unsigned long *indexp, + unsigned long max, xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp + 1); + void *entry; + + rcu_read_lock(); + for (;;) { + if ((__force unsigned int)filter < XA_MAX_MARKS) + entry = xas_find_marked(&xas, max, filter); + else + entry = xas_find(&xas, max); + if (xas.xa_node == XAS_BOUNDS) + break; + if (xas.xa_shift) { + if (xas.xa_index & ((1UL << xas.xa_shift) - 1)) + continue; + } else { + if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK)) + continue; + } + if (!xas_retry(&xas, entry)) + break; + } + rcu_read_unlock(); + + if (entry) + *indexp = xas.xa_index; + return entry; +} +EXPORT_SYMBOL(xa_find_after); + +static unsigned int xas_extract_present(struct xa_state *xas, void **dst, + unsigned long max, unsigned int n) +{ + void *entry; + unsigned int i = 0; + + rcu_read_lock(); + xas_for_each(xas, entry, max) { + if (xas_retry(xas, entry)) + continue; + dst[i++] = entry; + if (i == n) + break; + } + rcu_read_unlock(); + + return i; +} + +static unsigned int xas_extract_marked(struct xa_state *xas, void **dst, + unsigned long max, unsigned int n, xa_mark_t mark) +{ + void *entry; + unsigned int i = 0; + + rcu_read_lock(); + xas_for_each_marked(xas, entry, max, mark) { + if (xas_retry(xas, entry)) + continue; + dst[i++] = entry; + if (i == n) + break; + } + rcu_read_unlock(); + + return i; +} + +/** + * xa_extract() - Copy selected entries from the XArray into a normal array. + * @xa: The source XArray to copy from. + * @dst: The buffer to copy entries into. + * @start: The first index in the XArray eligible to be selected. + * @max: The last index in the XArray eligible to be selected. + * @n: The maximum number of entries to copy. + * @filter: Selection criterion. + * + * Copies up to @n entries that match @filter from the XArray. The + * copied entries will have indices between @start and @max, inclusive. + * + * The @filter may be an XArray mark value, in which case entries which are + * marked with that mark will be copied. It may also be %XA_PRESENT, in + * which case all entries which are not %NULL will be copied. + * + * The entries returned may not represent a snapshot of the XArray at a + * moment in time. For example, if another thread stores to index 5, then + * index 10, calling xa_extract() may return the old contents of index 5 + * and the new contents of index 10. Indices not modified while this + * function is running will not be skipped. + * + * If you need stronger guarantees, holding the xa_lock across calls to this + * function will prevent concurrent modification. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The number of entries copied. + */ +unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, + unsigned long max, unsigned int n, xa_mark_t filter) +{ + XA_STATE(xas, xa, start); + + if (!n) + return 0; + + if ((__force unsigned int)filter < XA_MAX_MARKS) + return xas_extract_marked(&xas, dst, max, n, filter); + return xas_extract_present(&xas, dst, max, n); +} +EXPORT_SYMBOL(xa_extract); + +/** + * xa_destroy() - Free all internal data structures. + * @xa: XArray. + * + * After calling this function, the XArray is empty and has freed all memory + * allocated for its internal data structures. You are responsible for + * freeing the objects referenced by the XArray. + * + * Context: Any context. Takes and releases the xa_lock, interrupt-safe. + */ +void xa_destroy(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + unsigned long flags; + void *entry; + + xas.xa_node = NULL; + xas_lock_irqsave(&xas, flags); + entry = xa_head_locked(xa); + RCU_INIT_POINTER(xa->xa_head, NULL); + xas_init_marks(&xas); + if (xa_zero_busy(xa)) + xa_mark_clear(xa, XA_FREE_MARK); + /* lockdep checks we're still holding the lock in xas_free_nodes() */ + if (xa_is_node(entry)) + xas_free_nodes(&xas, xa_to_node(entry)); + xas_unlock_irqrestore(&xas, flags); +} +EXPORT_SYMBOL(xa_destroy); + +#ifdef XA_DEBUG +void xa_dump_node(const struct xa_node *node) +{ + unsigned i, j; + + if (!node) + return; + if ((unsigned long)node & 3) { + pr_cont("node %px\n", node); + return; + } + + pr_cont("node %px %s %d parent %px shift %d count %d values %d " + "array %px list %px %px marks", + node, node->parent ? "offset" : "max", node->offset, + node->parent, node->shift, node->count, node->nr_values, + node->array, node->private_list.prev, node->private_list.next); + for (i = 0; i < XA_MAX_MARKS; i++) + for (j = 0; j < XA_MARK_LONGS; j++) + pr_cont(" %lx", node->marks[i][j]); + pr_cont("\n"); +} + +void xa_dump_index(unsigned long index, unsigned int shift) +{ + if (!shift) + pr_info("%lu: ", index); + else if (shift >= BITS_PER_LONG) + pr_info("0-%lu: ", ~0UL); + else + pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1)); +} + +void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift) +{ + if (!entry) + return; + + xa_dump_index(index, shift); + + if (xa_is_node(entry)) { + if (shift == 0) { + pr_cont("%px\n", entry); + } else { + unsigned long i; + struct xa_node *node = xa_to_node(entry); + xa_dump_node(node); + for (i = 0; i < XA_CHUNK_SIZE; i++) + xa_dump_entry(node->slots[i], + index + (i << node->shift), node->shift); + } + } else if (xa_is_value(entry)) + pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry), + xa_to_value(entry), entry); + else if (!xa_is_internal(entry)) + pr_cont("%px\n", entry); + else if (xa_is_retry(entry)) + pr_cont("retry (%ld)\n", xa_to_internal(entry)); + else if (xa_is_sibling(entry)) + pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry)); + else if (xa_is_zero(entry)) + pr_cont("zero (%ld)\n", xa_to_internal(entry)); + else + pr_cont("UNKNOWN ENTRY (%px)\n", entry); +} + +void xa_dump(const struct xarray *xa) +{ + void *entry = xa->xa_head; + unsigned int shift = 0; + + pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry, + xa->xa_flags, xa_marked(xa, XA_MARK_0), + xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2)); + if (xa_is_node(entry)) + shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT; + xa_dump_entry(entry, 0, shift); +} +#endif +#endif /* HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY */ diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 1a2e781..09240cb 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1967,41 +1967,25 @@ EXTRA_KCFLAGS="$tmp_flags" ]) # LC_INODE_TIMESPEC64 # -# LC_XA_IS_VALUE -# kernel 4.19-rc6 commit 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 -# xarray: Replace exceptional entries -# adds xa_is_value -# -AC_DEFUN([LC_XA_IS_VALUE], [ -LB_CHECK_COMPILE([xa_is_value exist], -xa_is_value, [ - #include -],[ - xa_is_value(NULL); -],[ - AC_DEFINE(HAVE_XA_IS_VALUE, 1, [xa_is_value exist]) -]) -]) # LC_XA_IS_VALUE - -# # LC___XA_SET_MARK # # kernel 4.20 commit v4.19-rc5-248-g9b89a0355144 -# xarray: Add XArray marks +# xarray: Add XArray marks - replaced radix_tree_tag_set # AC_DEFUN([LC___XA_SET_MARK], [ +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" LB_CHECK_COMPILE([if '__xa_set_mark' exists], __xa_set_mark, [ - #include #include + #include ],[ - struct xarray *xa = NULL; - - __xa_set_mark(xa, 0, PAGECACHE_TAG_DIRTY); + radix_tree_tag_set(NULL, 0, PAGECACHE_TAG_DIRTY); ],[ - AC_DEFINE(HAVE___XA_SET_MARK, 1, + AC_DEFINE(HAVE_RADIX_TREE_TAG_SET, 1, [__xa_set_mark exists]) ]) +EXTRA_KCFLAGS="$tmp_flags" ]) # LC___XA_SET_MARK # @@ -2343,7 +2327,6 @@ AC_DEFUN([LC_PROG_LINUX], [ # 4.18 LC_INODE_TIMESPEC64 - LC_XA_IS_VALUE # 4.20 LC___XA_SET_MARK diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index 3db844c..89b3ad1 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -141,11 +141,19 @@ static inline bool d_is_positive(const struct dentry *dentry) # define inode_trylock(inode) mutex_trylock(&(inode)->i_mutex) #endif -#ifndef HAVE_XA_IS_VALUE -static inline bool xa_is_value(void *entry) +/* Old kernels lacked both Xarray support and the page cache + * using Xarrays. Our back ported Xarray support introduces + * the real xa_is_value() but we need a wrapper as well for + * the page cache interaction. Lets keep xa_is_value() separate + * in old kernels for Xarray support and page cache handling. + */ +#ifdef HAVE_RADIX_TREE_EXCEPTIONAL_ENTRY +static inline bool ll_xa_is_value(void *entry) { return radix_tree_exceptional_entry(entry); } +#else +#define ll_xa_is_value xa_is_value #endif #ifndef HAVE_TRUNCATE_INODE_PAGES_FINAL @@ -492,13 +500,12 @@ static inline bool bdev_integrity_enabled(struct block_device *bdev, int rw) #ifdef HAVE_I_PAGES #define page_tree i_pages +#define ll_xa_lock_irqsave(lockp, flags) xa_lock_irqsave(lockp, flags) +#define ll_xa_unlock_irqrestore(lockp, flags) xa_unlock_irqrestore(lockp, flags) #else #define i_pages tree_lock -#endif - -#ifndef xa_lock_irqsave -#define xa_lock_irqsave(lockp, flags) spin_lock_irqsave(lockp, flags) -#define xa_unlock_irqrestore(lockp, flags) spin_unlock_irqrestore(lockp, flags) +#define ll_xa_lock_irqsave(lockp, flags) spin_lock_irqsave(lockp, flags) +#define ll_xa_unlock_irqrestore(lockp, flags) spin_unlock_irqrestore(lockp, flags) #endif #ifndef HAVE_LOCK_PAGE_MEMCG diff --git a/lustre/include/obd.h b/lustre/include/obd.h index f8d9cb0..e1a6379 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 6c6cc80..d2168de 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2263,9 +2263,9 @@ void ll_delete_inode(struct inode *inode) */ nrpages = mapping->nrpages; if (nrpages) { - xa_lock_irqsave(&mapping->i_pages, flags); + ll_xa_lock_irqsave(&mapping->i_pages, flags); nrpages = mapping->nrpages; - xa_unlock_irqrestore(&mapping->i_pages, flags); + ll_xa_unlock_irqrestore(&mapping->i_pages, flags); } /* Workaround end */ LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, " diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 98bc985..652940f 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -37,10 +37,10 @@ #define DEBUG_SUBSYSTEM S_LLITE - #include #include #include + #include "llite_internal.h" #include "vvp_internal.h" #include @@ -953,7 +953,7 @@ static inline void ll_account_page_dirtied(struct page *page, static inline void ll_page_tag_dirty(struct page *page, struct address_space *mapping) { -#ifdef HAVE___XA_SET_MARK +#ifndef HAVE_RADIX_TREE_TAG_SET __xa_set_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); #else radix_tree_tag_set(&mapping->page_tree, page_index(page), @@ -990,7 +990,7 @@ void vvp_set_pagevec_dirty(struct pagevec *pvec) page, (void *) page->private); /* Rest of code derived from __set_page_dirty_nobuffers */ - xa_lock_irqsave(&mapping->i_pages, flags); + ll_xa_lock_irqsave(&mapping->i_pages, flags); /* Notes on differences with __set_page_dirty_nobuffers: * 1. We don't need to call page_mapping because we know this is a page @@ -1020,7 +1020,7 @@ void vvp_set_pagevec_dirty(struct pagevec *pvec) dirtied++; unlock_page_memcg(page); } - xa_unlock_irqrestore(&mapping->i_pages, flags); + ll_xa_unlock_irqrestore(&mapping->i_pages, flags); CDEBUG(D_VFSTRACE, "mapping %p, count %d, dirtied %d\n", mapping, count, dirtied); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 750ed3d..4ea3302 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -543,9 +545,6 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md) #else #define mdc_unpack_acl(req, md) 0 #endif -#ifdef HAVE_XA_IS_VALUE -# include -#endif int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, struct obd_export *dt_exp, struct obd_export *md_exp, @@ -1142,14 +1141,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash, unsigned long flags; int found; - xa_lock_irqsave(&mapping->i_pages, flags); + ll_xa_lock_irqsave(&mapping->i_pages, flags); found = radix_tree_gang_lookup(&mapping->page_tree, (void **)&page, offset, 1); - if (found > 0 && !xa_is_value(page)) { + if (found > 0 && !ll_xa_is_value(page)) { struct lu_dirpage *dp; get_page(page); - xa_unlock_irqrestore(&mapping->i_pages, flags); + ll_xa_unlock_irqrestore(&mapping->i_pages, flags); /* * In contrast to find_lock_page() we are sure that directory * page cannot be truncated (while DLM lock is held) and, @@ -1198,7 +1197,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash, page = ERR_PTR(-EIO); } } else { - xa_unlock_irqrestore(&mapping->i_pages, flags); + ll_xa_unlock_irqrestore(&mapping->i_pages, flags); page = NULL; } return page; -- 1.8.3.1