From: NeilBrown Date: Wed, 8 May 2019 14:17:24 +0000 (-0400) Subject: LU-11089 obdclass: remove locking from lu_context_exit() X-Git-Tag: 2.12.54~76 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F13%2F32713%2F8;p=fs%2Flustre-release.git LU-11089 obdclass: remove locking from lu_context_exit() Recent patches suggest that the locking in lu_context_exit() hurts performance as the changes that make are to improve performance. Let's go all the way and remove the locking completely. The race of interest is between lu_context_exit() finalizing a value with ->lct_exit, and lu_context_key_quiesce() freeing the value with key_fini(). If lu_context_key_quiesce() has started, there is no need to finalize the value - it can just be freed. So lu_context_exit() is changed to skip the call to ->lcu_exit if LCT_QUIESCENT it set. If lc_context_exit() has started, lu_context_key_quiesce() must wait for it to complete - it cannot just skip the freeing. To allow this we introduce a new lc_state, LCS_LEAVING. This indicates that ->lcu_exit might be called. Before calling key_fini() on a context, lu_context_key_quiesce() waits (spinning) for lc_state to move on from LCS_LEAVING. Linux-commit: ac3f8fd6e61b245fa9c14e3164203c1211c5ef6b fix possible hang waiting for LCS_LEAVING As lu_context_key_quiesce() spins waiting for LCS_LEAVING to change, it is important the we set and then clear in within a non-preemptible region. If the thread that spins pre-empty the thread that sets-and-clears the state while the state is LCS_LEAVING, then it can spin indefinitely, particularly on a single-CPU machine. Also update the comment to explain this dependency. Linux-commit: 4859716f66db6989ef4bf52434b5b1d813c6adc1 Change-Id: I92ef27304eab43518fcb216b9c9cb4875cc9b98c Signed-off-by: NeilBrown Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/32713 Tested-by: Jenkins Reviewed-by: Gu Zheng Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 82a4ebc..12efc01 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -905,6 +905,17 @@ uuid_t, [ ]) # LIBCFS_UUID_T # +# Kernel version 4.12-rc3 commit fd851a3cdc196bfc1d229b5f22369069af532bf8 +# introduce processor.h +# +AC_DEFUN([LIBCFS_HAVE_PROCESSOR_HEADER], [ +LB_CHECK_LINUX_HEADER([linux/processor.h], [ + AC_DEFINE(HAVE_PROCESSOR_H, 1, + [processor.h is present])]) +]) # LIBCFS_HAVE_PROCESSOR_HEADER + + +# # LIBCFS_WAIT_QUEUE_ENTRY # # Kernel version 4.13 ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f @@ -1071,6 +1082,7 @@ LIBCFS_HOTPLUG_STATE_MACHINE LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST LIBCFS_SCHED_HEADERS # 4.12 +LIBCFS_HAVE_PROCESSOR_HEADER LIBCFS_UUID_T # 4.13 LIBCFS_WAIT_QUEUE_ENTRY diff --git a/libcfs/include/libcfs/linux/Makefile.am b/libcfs/include/libcfs/linux/Makefile.am index 0aaef36..82bc7db 100644 --- a/libcfs/include/libcfs/linux/Makefile.am +++ b/libcfs/include/libcfs/linux/Makefile.am @@ -1,2 +1,3 @@ EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \ - linux-list.h linux-hash.h linux-uuid.h linux-crypto.h + linux-list.h linux-hash.h linux-uuid.h linux-crypto.h \ + processor.h diff --git a/libcfs/include/libcfs/linux/processor.h b/libcfs/include/libcfs/linux/processor.h new file mode 100644 index 0000000..700d01e --- /dev/null +++ b/libcfs/include/libcfs/linux/processor.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Misc low level processor primitives */ +#ifndef _LINUX_PROCESSOR_H +#define _LINUX_PROCESSOR_H + +#include + +/* + * spin_begin is used before beginning a busy-wait loop, and must be paired + * with spin_end when the loop is exited. spin_cpu_relax must be called + * within the loop. + * + * The loop body should be as small and fast as possible, on the order of + * tens of instructions/cycles as a guide. It should and avoid calling + * cpu_relax, or any "spin" or sleep type of primitive including nested uses + * of these primitives. It should not lock or take any other resource. + * Violations of these guidelies will not cause a bug, but may cause sub + * optimal performance. + * + * These loops are optimized to be used where wait times are expected to be + * less than the cost of a context switch (and associated overhead). + * + * Detection of resource owner and decision to spin or sleep or guest-yield + * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be + * tested within the loop body. + */ +#ifndef spin_begin +#ifdef CONFIG_PPC64 +#define spin_begin() HMT_low() +#else +#define spin_begin() +#endif /* CONFIG_PPC64 */ +#endif /* spin_begin */ + +#ifndef spin_cpu_relax +#define spin_cpu_relax() cpu_relax() +#endif + +/* + * spin_cpu_yield may be called to yield (undirected) to the hypervisor if + * necessary. This should be used if the wait is expected to take longer + * than context switch overhead, but we can't sleep or do a directed yield. + */ +#ifndef spin_cpu_yield +#define spin_cpu_yield() cpu_relax_yield() +#endif + +#ifndef spin_end +#ifdef CONFIG_PPC64 +#define spin_end() HMT_medium() +#else +#define spin_end() +#endif /* CONFIG_PPC64 */ +#endif /* spin_end */ + +/* + * spin_until_cond can be used to wait for a condition to become true. It + * may be expected that the first iteration will true in the common case + * (no spinning), so that callers should not require a first "likely" test + * for the uncontended case before using this primitive. + * + * Usage and implementation guidelines are the same as for the spin_begin + * primitives, above. + */ +#ifndef spin_until_cond +#define spin_until_cond(cond) \ +do { \ + if (unlikely(!(cond))) { \ + spin_begin(); \ + do { \ + spin_cpu_relax(); \ + } while (!(cond)); \ + spin_end(); \ + } \ +} while (0) + +#endif + +#endif /* _LINUX_PROCESSOR_H */ diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 54e0410..0451563 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -890,6 +890,7 @@ enum lu_xattr_flags { enum lu_context_state { LCS_INITIALIZED = 1, LCS_ENTERED, + LCS_LEAVING, LCS_LEFT, LCS_FINALIZED }; diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index f951411..4ea091a 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -698,6 +698,10 @@ static inline struct timespec current_time(struct inode *inode) #define alloc_workqueue(name, flags, max_active) create_workqueue(name) #endif +#ifndef smp_store_mb +#define smp_store_mb(var, value) set_mb(var, value) +#endif + #ifndef READ_ONCE #define READ_ONCE ACCESS_ONCE #endif diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 63729fe..9f62693 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -42,6 +42,12 @@ #include #include +#ifdef HAVE_PROCESSOR_H +#include +#else +#include +#endif + #include #include /* hash_long() */ #include @@ -1560,8 +1566,11 @@ void lu_context_key_quiesce(struct lu_context_key *key) up_write(&lu_key_initing); write_lock(&lu_keys_guard); - list_for_each_entry(ctx, &lu_context_remembered, lc_remember) + list_for_each_entry(ctx, &lu_context_remembered, lc_remember) { + spin_until_cond(READ_ONCE(ctx->lc_state) != LCS_LEAVING); key_fini(ctx, key->lct_index); + } + write_unlock(&lu_keys_guard); } } @@ -1721,28 +1730,35 @@ void lu_context_exit(struct lu_context *ctx) { unsigned int i; - LINVRNT(ctx->lc_state == LCS_ENTERED); - ctx->lc_state = LCS_LEFT; - if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value != NULL) { - /* could race with key quiescency */ - if (ctx->lc_tags & LCT_REMEMBER) - read_lock(&lu_keys_guard); - + LINVRNT(ctx->lc_state == LCS_ENTERED); + /* + * Disable preempt to ensure we get a warning if + * any lct_exit ever tries to sleep. That would hurt + * lu_context_key_quiesce() which spins waiting for us. + * This also ensure we aren't preempted while the state + * is LCS_LEAVING, as that too would cause problems for + * lu_context_key_quiesce(). + */ + preempt_disable(); + /* + * Ensure lu_context_key_quiesce() sees LCS_LEAVING + * or we see LCT_QUIESCENT + */ + smp_store_mb(ctx->lc_state, LCS_LEAVING); + if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) { for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { - if (ctx->lc_value[i] != NULL) { - struct lu_context_key *key; - - key = lu_keys[i]; - LASSERT(key != NULL); - if (key->lct_exit != NULL) - key->lct_exit(ctx, - key, ctx->lc_value[i]); - } - } + struct lu_context_key *key; - if (ctx->lc_tags & LCT_REMEMBER) - read_unlock(&lu_keys_guard); + key = lu_keys[i]; + if (ctx->lc_value[i] && + !(key->lct_tags & LCT_QUIESCENT) && + key->lct_exit) + key->lct_exit(ctx, key, ctx->lc_value[i]); + } } + + smp_store_release(&ctx->lc_state, LCS_LEFT); + preempt_enable(); } EXPORT_SYMBOL(lu_context_exit);