Recent patches suggest that the locking in lu_context_exit() hurts
performance as the changes that make are to improve performance.
Let's go all the way and remove the locking completely.
The race of interest is between lu_context_exit() finalizing a
value with ->lct_exit, and lu_context_key_quiesce() freeing
the value with key_fini().
If lu_context_key_quiesce() has started, there is no need to
finalize the value - it can just be freed. So lu_context_exit()
is changed to skip the call to ->lcu_exit if LCT_QUIESCENT it set.
If lc_context_exit() has started, lu_context_key_quiesce() must wait
for it to complete - it cannot just skip the freeing. To allow
this we introduce a new lc_state, LCS_LEAVING. This indicates that
->lcu_exit might be called. Before calling key_fini() on a context,
lu_context_key_quiesce() waits (spinning) for lc_state to move on from
LCS_LEAVING.
Linux-commit:
ac3f8fd6e61b245fa9c14e3164203c1211c5ef6b
fix possible hang waiting for LCS_LEAVING
As lu_context_key_quiesce() spins waiting for LCS_LEAVING to
change, it is important the we set and then clear in within a
non-preemptible region. If the thread that spins pre-empty the
thread that sets-and-clears the state while the state is LCS_LEAVING,
then it can spin indefinitely, particularly on a single-CPU machine.
Also update the comment to explain this dependency.
Linux-commit:
4859716f66db6989ef4bf52434b5b1d813c6adc1
Change-Id: I92ef27304eab43518fcb216b9c9cb4875cc9b98c
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-on: https://review.whamcloud.com/32713
Tested-by: Jenkins
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
]) # LIBCFS_UUID_T
#
+# Kernel version 4.12-rc3 commit fd851a3cdc196bfc1d229b5f22369069af532bf8
+# introduce processor.h
+#
+AC_DEFUN([LIBCFS_HAVE_PROCESSOR_HEADER], [
+LB_CHECK_LINUX_HEADER([linux/processor.h], [
+ AC_DEFINE(HAVE_PROCESSOR_H, 1,
+ [processor.h is present])])
+]) # LIBCFS_HAVE_PROCESSOR_HEADER
+
+
+#
# LIBCFS_WAIT_QUEUE_ENTRY
#
# Kernel version 4.13 ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f
LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
LIBCFS_SCHED_HEADERS
# 4.12
+LIBCFS_HAVE_PROCESSOR_HEADER
LIBCFS_UUID_T
# 4.13
LIBCFS_WAIT_QUEUE_ENTRY
EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \
- linux-list.h linux-hash.h linux-uuid.h linux-crypto.h
+ linux-list.h linux-hash.h linux-uuid.h linux-crypto.h \
+ processor.h
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * The loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of these guidelies will not cause a bug, but may cause sub
+ * optimal performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the loop body.
+ */
+#ifndef spin_begin
+#ifdef CONFIG_PPC64
+#define spin_begin() HMT_low()
+#else
+#define spin_begin()
+#endif /* CONFIG_PPC64 */
+#endif /* spin_begin */
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#ifdef CONFIG_PPC64
+#define spin_end() HMT_medium()
+#else
+#define spin_end()
+#endif /* CONFIG_PPC64 */
+#endif /* spin_end */
+
+/*
+ * spin_until_cond can be used to wait for a condition to become true. It
+ * may be expected that the first iteration will true in the common case
+ * (no spinning), so that callers should not require a first "likely" test
+ * for the uncontended case before using this primitive.
+ *
+ * Usage and implementation guidelines are the same as for the spin_begin
+ * primitives, above.
+ */
+#ifndef spin_until_cond
+#define spin_until_cond(cond) \
+do { \
+ if (unlikely(!(cond))) { \
+ spin_begin(); \
+ do { \
+ spin_cpu_relax(); \
+ } while (!(cond)); \
+ spin_end(); \
+ } \
+} while (0)
+
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
enum lu_context_state {
LCS_INITIALIZED = 1,
LCS_ENTERED,
+ LCS_LEAVING,
LCS_LEFT,
LCS_FINALIZED
};
#define alloc_workqueue(name, flags, max_active) create_workqueue(name)
#endif
+#ifndef smp_store_mb
+#define smp_store_mb(var, value) set_mb(var, value)
+#endif
+
#ifndef READ_ONCE
#define READ_ONCE ACCESS_ONCE
#endif
#include <linux/module.h>
#include <linux/list.h>
+#ifdef HAVE_PROCESSOR_H
+#include <linux/processor.h>
+#else
+#include <libcfs/linux/processor.h>
+#endif
+
#include <libcfs/libcfs.h>
#include <libcfs/libcfs_hash.h> /* hash_long() */
#include <libcfs/linux/linux-mem.h>
up_write(&lu_key_initing);
write_lock(&lu_keys_guard);
- list_for_each_entry(ctx, &lu_context_remembered, lc_remember)
+ list_for_each_entry(ctx, &lu_context_remembered, lc_remember) {
+ spin_until_cond(READ_ONCE(ctx->lc_state) != LCS_LEAVING);
key_fini(ctx, key->lct_index);
+ }
+
write_unlock(&lu_keys_guard);
}
}
{
unsigned int i;
- LINVRNT(ctx->lc_state == LCS_ENTERED);
- ctx->lc_state = LCS_LEFT;
- if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value != NULL) {
- /* could race with key quiescency */
- if (ctx->lc_tags & LCT_REMEMBER)
- read_lock(&lu_keys_guard);
-
+ LINVRNT(ctx->lc_state == LCS_ENTERED);
+ /*
+ * Disable preempt to ensure we get a warning if
+ * any lct_exit ever tries to sleep. That would hurt
+ * lu_context_key_quiesce() which spins waiting for us.
+ * This also ensure we aren't preempted while the state
+ * is LCS_LEAVING, as that too would cause problems for
+ * lu_context_key_quiesce().
+ */
+ preempt_disable();
+ /*
+ * Ensure lu_context_key_quiesce() sees LCS_LEAVING
+ * or we see LCT_QUIESCENT
+ */
+ smp_store_mb(ctx->lc_state, LCS_LEAVING);
+ if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) {
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
- if (ctx->lc_value[i] != NULL) {
- struct lu_context_key *key;
-
- key = lu_keys[i];
- LASSERT(key != NULL);
- if (key->lct_exit != NULL)
- key->lct_exit(ctx,
- key, ctx->lc_value[i]);
- }
- }
+ struct lu_context_key *key;
- if (ctx->lc_tags & LCT_REMEMBER)
- read_unlock(&lu_keys_guard);
+ key = lu_keys[i];
+ if (ctx->lc_value[i] &&
+ !(key->lct_tags & LCT_QUIESCENT) &&
+ key->lct_exit)
+ key->lct_exit(ctx, key, ctx->lc_value[i]);
+ }
}
+
+ smp_store_release(&ctx->lc_state, LCS_LEFT);
+ preempt_enable();
}
EXPORT_SYMBOL(lu_context_exit);