A workqueue is used by Lustre to optimize readahead. This work
queue can run on any core and can easily be over surscribed. This
will have a negative impact on HPC applications running on a
Lustre client. Limit the number of threads a workqueue can run
to the size of the CPU allocated for Lustre and only allow those
threads to run on the cores belonging to the CPT set.
Change-Id: Ifcc662d52843f5028c34d55695c1d6297e5c00b0
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/37717
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Stephen Champion <stephen.champion@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
]) # LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
#
]) # LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
#
+# Kernel version 4.12-rc2 8f553c498e1772cccb39a114da4a498d22992758
+# provide proper CPU hotplug locking
+#
+AC_DEFUN([LIBCFS_CPUS_READ_LOCK], [
+LB_CHECK_COMPILE([if 'cpus_read_[un]lock' exist],
+cpu_read_lock, [
+ #include <linux/cpu.h>
+],[
+ cpus_read_lock();
+ cpus_read_unlock();
+],[
+ AC_DEFINE(HAVE_CPUS_READ_LOCK, 1, ['cpu_read_lock' exist])
+])
+]) # LIBCFS_CPUS_READ_LOCK
+
+#
# Kernel version 4.12-rc3 f9727a17db9bab71ddae91f74f11a8a2f9a0ece6
# renamed uuid_be to uuid_t
#
# Kernel version 4.12-rc3 f9727a17db9bab71ddae91f74f11a8a2f9a0ece6
# renamed uuid_be to uuid_t
#
LIBCFS_HAVE_PROCESSOR_HEADER
LIBCFS_HAVE_WAIT_BIT_HEADER
LIBCFS_WAIT_QUEUE_TASK_LIST_RENAME
LIBCFS_HAVE_PROCESSOR_HEADER
LIBCFS_HAVE_WAIT_BIT_HEADER
LIBCFS_WAIT_QUEUE_TASK_LIST_RENAME
LIBCFS_UUID_T
# 4.13
LIBCFS_WAIT_QUEUE_ENTRY
LIBCFS_UUID_T
# 4.13
LIBCFS_WAIT_QUEUE_ENTRY
#ifndef __LIBCFS_LINUX_CPU_H__
#define __LIBCFS_LINUX_CPU_H__
#ifndef __LIBCFS_LINUX_CPU_H__
#define __LIBCFS_LINUX_CPU_H__
+#include <linux/cpu.h>
+
#ifndef HAVE_TOPOLOGY_SIBLING_CPUMASK
# define topology_sibling_cpumask(cpu) topology_thread_cpumask(cpu)
#endif /* HAVE_TOPOLOGY_SIBLING_CPUMASK */
#ifndef HAVE_TOPOLOGY_SIBLING_CPUMASK
# define topology_sibling_cpumask(cpu) topology_thread_cpumask(cpu)
#endif /* HAVE_TOPOLOGY_SIBLING_CPUMASK */
+#ifndef HAVE_CPUS_READ_LOCK
+# define cpus_read_lock get_online_cpus
+# define cpus_read_unlock put_online_cpus
+#endif
+
#endif /* __LIBCFS_LINUX_CPU_H__ */
#endif /* __LIBCFS_LINUX_CPU_H__ */
#define DEBUG_SUBSYSTEM S_LLITE
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/module.h>
#include <linux/random.h>
#include <linux/statfs.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/statfs.h>
#include <linux/uidgid.h>
#include <linux/security.h>
#include <linux/uidgid.h>
#include <linux/security.h>
+#ifndef HAVE_CPUS_READ_LOCK
+#include <libcfs/linux/linux-cpu.h>
+#endif
#include <uapi/linux/lustre/lustre_ioctl.h>
#ifdef HAVE_UAPI_LINUX_MOUNT_H
#include <uapi/linux/mount.h>
#include <uapi/linux/lustre/lustre_ioctl.h>
#ifdef HAVE_UAPI_LINUX_MOUNT_H
#include <uapi/linux/mount.h>
static struct ll_sb_info *ll_init_sbi(void)
{
static struct ll_sb_info *ll_init_sbi(void)
{
+ struct workqueue_attrs attrs = { };
+ cpumask_var_t *mask;
struct ll_sb_info *sbi = NULL;
unsigned long pages;
unsigned long lru_page_max;
struct ll_sb_info *sbi = NULL;
unsigned long pages;
unsigned long lru_page_max;
pages = si.totalram - si.totalhigh;
lru_page_max = pages / 2;
pages = si.totalram - si.totalhigh;
lru_page_max = pages / 2;
- sbi->ll_ra_info.ra_async_max_active = 0;
+ sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
sbi->ll_ra_info.ll_readahead_wq =
alloc_workqueue("ll-readahead-wq", WQ_UNBOUND,
sbi->ll_ra_info.ra_async_max_active);
if (!sbi->ll_ra_info.ll_readahead_wq)
GOTO(out_pcc, rc = -ENOMEM);
sbi->ll_ra_info.ll_readahead_wq =
alloc_workqueue("ll-readahead-wq", WQ_UNBOUND,
sbi->ll_ra_info.ra_async_max_active);
if (!sbi->ll_ra_info.ll_readahead_wq)
GOTO(out_pcc, rc = -ENOMEM);
+ mask = cfs_cpt_cpumask(cfs_cpt_tab, CFS_CPT_ANY);
+ if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
+ cpumask_copy(attrs.cpumask, *mask);
+ cpus_read_lock();
+ cfs_apply_workqueue_attrs(sbi->ll_ra_info.ll_readahead_wq,
+ &attrs);
+ cpus_read_unlock();
+ free_cpumask_var(attrs.cpumask);
+ }
+
/* initialize ll_cache data */
sbi->ll_cache = cl_cache_init(lru_page_max);
if (sbi->ll_cache == NULL)
/* initialize ll_cache data */
sbi->ll_cache = cl_cache_init(lru_page_max);
if (sbi->ll_cache == NULL)
sbi->ll_ra_info.ra_max_pages_per_file;
sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
sbi->ll_ra_info.ra_max_pages_per_file;
sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
- sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
sbi->ll_flags |= LL_SBI_VERBOSE;
atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
sbi->ll_flags |= LL_SBI_VERBOSE;
/**
* It doesn't make any sense to make it exceed what
/**
* It doesn't make any sense to make it exceed what
- * workqueue could acutally support.
+ * workqueue could acutally support. This can easily
+ * over subscripe the cores but Lustre internally
+ * throttles to avoid those impacts.
*/
if (val > WQ_UNBOUND_MAX_ACTIVE) {
CERROR("%s: cannot set max_read_ahead_async_active=%u larger than %u\n",
*/
if (val > WQ_UNBOUND_MAX_ACTIVE) {
CERROR("%s: cannot set max_read_ahead_async_active=%u larger than %u\n",