Whamcloud - gitweb
LU-13258 llite: bind readahead workqueue to CPT set 17/37717/9
authorJames Simmons <jsimmons@infradead.org>
Wed, 25 Mar 2020 02:47:00 +0000 (22:47 -0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Apr 2020 17:18:53 +0000 (17:18 +0000)
A workqueue is used by Lustre to optimize readahead. This work
queue can run on any core and can easily be over surscribed. This
will have a negative impact on HPC applications running on a
Lustre client. Limit the number of threads a workqueue can run
to the size of the CPU allocated for Lustre and only allow those
threads to run on the cores belonging to the CPT set.

Change-Id: Ifcc662d52843f5028c34d55695c1d6297e5c00b0
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/37717
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Stephen Champion <stephen.champion@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/linux/linux-cpu.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c

index 0c384d4..a46d3f8 100644 (file)
@@ -786,6 +786,22 @@ rhashtable_lookup_get_insert_fast, [
 ]) # LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
 
 #
+# Kernel version 4.12-rc2 8f553c498e1772cccb39a114da4a498d22992758
+# provide proper CPU hotplug locking
+#
+AC_DEFUN([LIBCFS_CPUS_READ_LOCK], [
+LB_CHECK_COMPILE([if 'cpus_read_[un]lock' exist],
+cpu_read_lock, [
+       #include <linux/cpu.h>
+],[
+       cpus_read_lock();
+       cpus_read_unlock();
+],[
+       AC_DEFINE(HAVE_CPUS_READ_LOCK, 1, ['cpu_read_lock' exist])
+])
+]) # LIBCFS_CPUS_READ_LOCK
+
+#
 # Kernel version 4.12-rc3 f9727a17db9bab71ddae91f74f11a8a2f9a0ece6
 # renamed uuid_be to uuid_t
 #
@@ -1219,6 +1235,7 @@ LIBCFS_RHT_BUCKET_VAR
 LIBCFS_HAVE_PROCESSOR_HEADER
 LIBCFS_HAVE_WAIT_BIT_HEADER
 LIBCFS_WAIT_QUEUE_TASK_LIST_RENAME
+LIBCFS_CPUS_READ_LOCK
 LIBCFS_UUID_T
 # 4.13
 LIBCFS_WAIT_QUEUE_ENTRY
index ab6b55e..8353f69 100644 (file)
 #ifndef __LIBCFS_LINUX_CPU_H__
 #define __LIBCFS_LINUX_CPU_H__
 
+#include <linux/cpu.h>
+
 #ifndef HAVE_TOPOLOGY_SIBLING_CPUMASK
 # define topology_sibling_cpumask(cpu) topology_thread_cpumask(cpu)
 #endif /* HAVE_TOPOLOGY_SIBLING_CPUMASK */
 
+#ifndef HAVE_CPUS_READ_LOCK
+# define cpus_read_lock                get_online_cpus
+# define cpus_read_unlock      put_online_cpus
+#endif
+
 #endif /* __LIBCFS_LINUX_CPU_H__ */
index 9c4983b..5a56084 100644 (file)
@@ -36,6 +36,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/statfs.h>
@@ -49,6 +50,9 @@
 #include <linux/uidgid.h>
 #include <linux/security.h>
 
+#ifndef HAVE_CPUS_READ_LOCK
+#include <libcfs/linux/linux-cpu.h>
+#endif
 #include <uapi/linux/lustre/lustre_ioctl.h>
 #ifdef HAVE_UAPI_LINUX_MOUNT_H
 #include <uapi/linux/mount.h>
@@ -83,6 +87,8 @@ static inline unsigned int ll_get_ra_async_max_active(void)
 
 static struct ll_sb_info *ll_init_sbi(void)
 {
+       struct workqueue_attrs attrs = { };
+       cpumask_var_t *mask;
        struct ll_sb_info *sbi = NULL;
        unsigned long pages;
        unsigned long lru_page_max;
@@ -111,13 +117,23 @@ static struct ll_sb_info *ll_init_sbi(void)
         pages = si.totalram - si.totalhigh;
        lru_page_max = pages / 2;
 
-       sbi->ll_ra_info.ra_async_max_active = 0;
+       sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
        sbi->ll_ra_info.ll_readahead_wq =
                alloc_workqueue("ll-readahead-wq", WQ_UNBOUND,
                                sbi->ll_ra_info.ra_async_max_active);
        if (!sbi->ll_ra_info.ll_readahead_wq)
                GOTO(out_pcc, rc = -ENOMEM);
 
+       mask = cfs_cpt_cpumask(cfs_cpt_tab, CFS_CPT_ANY);
+       if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) {
+               cpumask_copy(attrs.cpumask, *mask);
+               cpus_read_lock();
+               cfs_apply_workqueue_attrs(sbi->ll_ra_info.ll_readahead_wq,
+                                         &attrs);
+               cpus_read_unlock();
+               free_cpumask_var(attrs.cpumask);
+       }
+
        /* initialize ll_cache data */
        sbi->ll_cache = cl_cache_init(lru_page_max);
        if (sbi->ll_cache == NULL)
@@ -129,7 +145,6 @@ static struct ll_sb_info *ll_init_sbi(void)
                                sbi->ll_ra_info.ra_max_pages_per_file;
        sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
        sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
-       sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
        atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
 
         sbi->ll_flags |= LL_SBI_VERBOSE;
index d7d9f97..3e5310d 100644 (file)
@@ -1114,7 +1114,9 @@ static ssize_t max_read_ahead_async_active_store(struct kobject *kobj,
 
        /**
         * It doesn't make any sense to make it exceed what
-        * workqueue could acutally support.
+        * workqueue could acutally support. This can easily
+        * over subscripe the cores but Lustre internally
+        * throttles to avoid those impacts.
         */
        if (val > WQ_UNBOUND_MAX_ACTIVE) {
                CERROR("%s: cannot set max_read_ahead_async_active=%u larger than %u\n",