From 21ccfd99de46e1ca1572b16db0d3cef9cd805f4f Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 24 Mar 2020 22:47:00 -0400 Subject: [PATCH] LU-13258 llite: bind readahead workqueue to CPT set A workqueue is used by Lustre to optimize readahead. This work queue can run on any core and can easily be over surscribed. This will have a negative impact on HPC applications running on a Lustre client. Limit the number of threads a workqueue can run to the size of the CPU allocated for Lustre and only allow those threads to run on the cores belonging to the CPT set. Change-Id: Ifcc662d52843f5028c34d55695c1d6297e5c00b0 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/37717 Reviewed-by: Wang Shilong Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Stephen Champion Reviewed-by: Oleg Drokin --- libcfs/autoconf/lustre-libcfs.m4 | 17 +++++++++++++++++ libcfs/include/libcfs/linux/linux-cpu.h | 7 +++++++ lustre/llite/llite_lib.c | 19 +++++++++++++++++-- lustre/llite/lproc_llite.c | 4 +++- 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 0c384d41..a46d3f8 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -786,6 +786,22 @@ rhashtable_lookup_get_insert_fast, [ ]) # LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST # +# Kernel version 4.12-rc2 8f553c498e1772cccb39a114da4a498d22992758 +# provide proper CPU hotplug locking +# +AC_DEFUN([LIBCFS_CPUS_READ_LOCK], [ +LB_CHECK_COMPILE([if 'cpus_read_[un]lock' exist], +cpu_read_lock, [ + #include +],[ + cpus_read_lock(); + cpus_read_unlock(); +],[ + AC_DEFINE(HAVE_CPUS_READ_LOCK, 1, ['cpu_read_lock' exist]) +]) +]) # LIBCFS_CPUS_READ_LOCK + +# # Kernel version 4.12-rc3 f9727a17db9bab71ddae91f74f11a8a2f9a0ece6 # renamed uuid_be to uuid_t # @@ -1219,6 +1235,7 @@ LIBCFS_RHT_BUCKET_VAR LIBCFS_HAVE_PROCESSOR_HEADER LIBCFS_HAVE_WAIT_BIT_HEADER LIBCFS_WAIT_QUEUE_TASK_LIST_RENAME +LIBCFS_CPUS_READ_LOCK LIBCFS_UUID_T # 4.13 LIBCFS_WAIT_QUEUE_ENTRY diff --git a/libcfs/include/libcfs/linux/linux-cpu.h b/libcfs/include/libcfs/linux/linux-cpu.h index ab6b55e..8353f69 100644 --- a/libcfs/include/libcfs/linux/linux-cpu.h +++ b/libcfs/include/libcfs/linux/linux-cpu.h @@ -39,8 +39,15 @@ #ifndef __LIBCFS_LINUX_CPU_H__ #define __LIBCFS_LINUX_CPU_H__ +#include + #ifndef HAVE_TOPOLOGY_SIBLING_CPUMASK # define topology_sibling_cpumask(cpu) topology_thread_cpumask(cpu) #endif /* HAVE_TOPOLOGY_SIBLING_CPUMASK */ +#ifndef HAVE_CPUS_READ_LOCK +# define cpus_read_lock get_online_cpus +# define cpus_read_unlock put_online_cpus +#endif + #endif /* __LIBCFS_LINUX_CPU_H__ */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 9c4983b..5a56084 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -36,6 +36,7 @@ #define DEBUG_SUBSYSTEM S_LLITE +#include #include #include #include @@ -49,6 +50,9 @@ #include #include +#ifndef HAVE_CPUS_READ_LOCK +#include +#endif #include #ifdef HAVE_UAPI_LINUX_MOUNT_H #include @@ -83,6 +87,8 @@ static inline unsigned int ll_get_ra_async_max_active(void) static struct ll_sb_info *ll_init_sbi(void) { + struct workqueue_attrs attrs = { }; + cpumask_var_t *mask; struct ll_sb_info *sbi = NULL; unsigned long pages; unsigned long lru_page_max; @@ -111,13 +117,23 @@ static struct ll_sb_info *ll_init_sbi(void) pages = si.totalram - si.totalhigh; lru_page_max = pages / 2; - sbi->ll_ra_info.ra_async_max_active = 0; + sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active(); sbi->ll_ra_info.ll_readahead_wq = alloc_workqueue("ll-readahead-wq", WQ_UNBOUND, sbi->ll_ra_info.ra_async_max_active); if (!sbi->ll_ra_info.ll_readahead_wq) GOTO(out_pcc, rc = -ENOMEM); + mask = cfs_cpt_cpumask(cfs_cpt_tab, CFS_CPT_ANY); + if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) { + cpumask_copy(attrs.cpumask, *mask); + cpus_read_lock(); + cfs_apply_workqueue_attrs(sbi->ll_ra_info.ll_readahead_wq, + &attrs); + cpus_read_unlock(); + free_cpumask_var(attrs.cpumask); + } + /* initialize ll_cache data */ sbi->ll_cache = cl_cache_init(lru_page_max); if (sbi->ll_cache == NULL) @@ -129,7 +145,6 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_ra_info.ra_max_pages_per_file; sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1; - sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active(); atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0); sbi->ll_flags |= LL_SBI_VERBOSE; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index d7d9f97..3e5310d 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -1114,7 +1114,9 @@ static ssize_t max_read_ahead_async_active_store(struct kobject *kobj, /** * It doesn't make any sense to make it exceed what - * workqueue could acutally support. + * workqueue could acutally support. This can easily + * over subscripe the cores but Lustre internally + * throttles to avoid those impacts. */ if (val > WQ_UNBOUND_MAX_ACTIVE) { CERROR("%s: cannot set max_read_ahead_async_active=%u larger than %u\n", -- 1.8.3.1