X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=libcfs%2Finclude%2Flibcfs%2Flibcfs_cpu.h;h=fb268a862c490be2fd5d2ec9610086f445913a19;hb=454a5f5c5496bdea667223a1608d885024f658af;hp=ef1529700c638526b41752d3aaa80d4895e8cf00;hpb=98060d83459ba10409f295898f0ec917f938b4d3;p=fs%2Flustre-release.git diff --git a/libcfs/include/libcfs/libcfs_cpu.h b/libcfs/include/libcfs/libcfs_cpu.h index ef15297..fb268a8 100644 --- a/libcfs/include/libcfs/libcfs_cpu.h +++ b/libcfs/include/libcfs/libcfs_cpu.h @@ -13,16 +13,12 @@ * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA - * * GPL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2013, Intel Corporation. + * + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,16 +42,16 @@ * * Example: if there are 8 cores on the system, while creating a CPT * with cpu_npartitions=4: - * core[0, 1] = partition[0], core[2, 3] = partition[1] - * core[4, 5] = partition[2], core[6, 7] = partition[3] + * core[0, 1] = partition[0], core[2, 3] = partition[1] + * core[4, 5] = partition[2], core[6, 7] = partition[3] * * cpu_npartitions=1: - * core[0, 1, ... 7] = partition[0] + * core[0, 1, ... 7] = partition[0] * * . User can also specify CPU partitions by string pattern * * Examples: cpu_partitions="0[0,1], 1[2,3]" - * cpu_partitions="N 0[0-3], 1[4-8]" + * cpu_partitions="N 0[0-3], 1[4-8]" * * The first character "N" means following numbers are numa ID * @@ -75,24 +71,56 @@ #ifndef __LIBCFS_CPU_H__ #define __LIBCFS_CPU_H__ -#ifndef HAVE_LIBCFS_CPT +#include +#include +#include +#include +#include +#include -typedef unsigned long cpumask_t; -typedef unsigned long nodemask_t; +#include +#ifdef CONFIG_SMP + +/** virtual processing unit */ +struct cfs_cpu_partition { + /* CPUs mask for this partition */ + cpumask_t *cpt_cpumask; + /* nodes mask for this partition */ + nodemask_t *cpt_nodemask; + /* NUMA distance between CPTs */ + unsigned int *cpt_distance; + /* spread rotor for NUMA allocator */ + int cpt_spread_rotor; + /* NUMA node if cpt_nodemask is empty */ + int cpt_node; +}; +#endif /* CONFIG_SMP */ + +/** descriptor for CPU partitions */ struct cfs_cpt_table { +#ifdef CONFIG_SMP + /* spread rotor for NUMA allocator */ + int ctb_spread_rotor; + /* maximum NUMA distance between all nodes in table */ + unsigned int ctb_distance; + /* partitions tables */ + struct cfs_cpu_partition *ctb_parts; + /* shadow HW CPU to CPU partition ID */ + int *ctb_cpu2cpt; + /* shadow HW node to CPU partition ID */ + int *ctb_node2cpt; /* # of CPU partitions */ - int ctb_nparts; - /* cpu mask */ - cpumask_t ctb_mask; - /* node mask */ - nodemask_t ctb_nodemask; - /* version */ - __u64 ctb_version; + int ctb_nparts; + /* all nodes in this partition table */ + nodemask_t *ctb_nodemask; +#else + nodemask_t ctb_nodemask; +#endif /* CONFIG_SMP */ + /* all cpus in this partition table */ + cpumask_t *ctb_cpumask; }; -#endif /* !HAVE_LIBCFS_CPT */ - /* any CPU partition */ #define CFS_CPT_ANY (-1) @@ -105,18 +133,21 @@ void cfs_cpt_table_free(struct cfs_cpt_table *cptab); /** * create a cfs_cpt_table with \a ncpt number of partitions */ -struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt); +struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt); /** * print string information of cpt-table */ int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len); /** + * print distance information of cpt-table + */ +int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len); +/** * return total number of CPU partitions in \a cptab */ -int -cfs_cpt_number(struct cfs_cpt_table *cptab); +int cfs_cpt_number(struct cfs_cpt_table *cptab); /** - * return number of HW cores or hypter-threadings in a CPU partition \a cpt + * return number of HW cores or hyper-threadings in a CPU partition \a cpt */ int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt); /** @@ -140,11 +171,19 @@ int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap); */ int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu); /** + * shadow HW node ID \a NODE to CPU-partition ID by \a cptab + */ +int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node); +/** + * NUMA distance between \a cpt1 and \a cpt2 in \a cptab + */ +unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2); +/** * bind current thread on a CPU-partition \a cpt of \a cptab */ int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt); /** - * add \a cpu to CPU partion @cpt of \a cptab, return 1 for success, + * add \a cpu to CPU partition @cpt of \a cptab, return 1 for success, * otherwise 0 is returned */ int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); @@ -156,13 +195,13 @@ void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); * add all cpus in \a mask to CPU partition \a cpt * return 1 if successfully set all CPUs, otherwise return 0 */ -int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, - int cpt, cpumask_t *mask); +int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, + const cpumask_t *mask); /** * remove all cpus in \a mask from CPU partition \a cpt */ -void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, - int cpt, cpumask_t *mask); +void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, + const cpumask_t *mask); /** * add all cpus in NUMA node \a node to CPU partition \a cpt * return 1 if successfully set all CPUs, otherwise return 0 @@ -177,37 +216,158 @@ void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node); * add all cpus in node mask \a mask to CPU partition \a cpt * return 1 if successfully set all CPUs, otherwise return 0 */ -int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, - int cpt, nodemask_t *mask); +int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, + const nodemask_t *mask); /** * remove all cpus in node mask \a mask from CPU partition \a cpt */ -void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, - int cpt, nodemask_t *mask); -/** - * unset all cpus for CPU partition \a cpt - */ -void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt); +void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, + const nodemask_t *mask); /** * convert partition id \a cpt to numa node id, if there are more than one * nodes in this partition, it might return a different node id each time. */ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt); +/* + * allocate per-cpu-partition data, returned value is an array of pointers, + * variable can be indexed by CPU ID. + * cptab != NULL: size of array is number of CPU partitions + * cptab == NULL: size of array is number of HW cores + */ +void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); +/* + * destroy per-cpu-partition variable + */ +void cfs_percpt_free(void *vars); +int cfs_percpt_number(void *vars); + +#define cfs_percpt_for_each(var, i, vars) \ + for (i = 0; i < cfs_percpt_number(vars) && \ + ((var) = (vars)[i]) != NULL; i++) + +/* + * percpu partition lock + * + * There are some use-cases like this in Lustre: + * . each CPU partition has it's own private data which is frequently changed, + * and mostly by the local CPU partition. + * . all CPU partitions share some global data, these data are rarely changed. + * + * LNet is typical example. + * CPU partition lock is designed for this kind of use-cases: + * . each CPU partition has it's own private lock + * . change on private data just needs to take the private lock + * . read on shared data just needs to take _any_ of private locks + * . change on shared data needs to take _all_ private locks, + * which is slow and should be really rare. + */ +enum { + CFS_PERCPT_LOCK_EX = -1, /* negative */ +}; + +struct cfs_percpt_lock { + /* cpu-partition-table for this lock */ + struct cfs_cpt_table *pcl_cptab; + /* exclusively locked */ + unsigned int pcl_locked; + /* private lock table */ + spinlock_t **pcl_locks; +}; + +/* return number of private locks */ +#define cfs_percpt_lock_num(pcl) cfs_cpt_number(pcl->pcl_cptab) + +/* + * create a cpu-partition lock based on CPU partition table \a cptab, + * each private lock has extra \a psize bytes padding data + */ +struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab, + struct lock_class_key *keys); +/* destroy a cpu-partition lock */ +void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); + +/* lock private lock \a index of \a pcl */ +void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); + +/* unlock private lock \a index of \a pcl */ +void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); + +#define CFS_PERCPT_LOCK_KEYS 256 + +/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */ +#define cfs_percpt_lock_alloc(cptab) \ +({ \ + static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS]; \ + struct cfs_percpt_lock *___lk; \ + \ + if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS) \ + ___lk = cfs_percpt_lock_create(cptab, NULL); \ + else \ + ___lk = cfs_percpt_lock_create(cptab, ___keys); \ + ___lk; \ +}) + +/** + * allocate \a nr_bytes of physical memory from a contiguous region with the + * properties of \a flags which are bound to the partition id \a cpt. This + * function should only be used for the case when only a few pages of memory + * are need. + */ +static inline void * +cfs_cpt_malloc(struct cfs_cpt_table *cptab, int cpt, size_t nr_bytes, + gfp_t flags) +{ + return kmalloc_node(nr_bytes, flags, + cfs_cpt_spread_node(cptab, cpt)); +} + +/** + * allocate \a nr_bytes of virtually contiguous memory that is bound to the + * partition id \a cpt. + */ +static inline void * +cfs_cpt_vzalloc(struct cfs_cpt_table *cptab, int cpt, size_t nr_bytes) +{ + /* vzalloc_node() sets __GFP_FS by default but no current Kernel + * exported entry-point allows for both a NUMA node specification + * and a custom allocation flags mask. This may be an issue since + * __GFP_FS usage can cause some deadlock situations in our code, + * like when memory reclaim started, within the same context of a + * thread doing FS operations, that can also attempt conflicting FS + * operations, ... + */ + return vzalloc_node(nr_bytes, cfs_cpt_spread_node(cptab, cpt)); +} + +/** + * allocate a single page of memory with the properties of \a flags were + * that page is bound to the partition id \a cpt. + */ +static inline struct page * +cfs_page_cpt_alloc(struct cfs_cpt_table *cptab, int cpt, gfp_t flags) +{ + return alloc_pages_node(cfs_cpt_spread_node(cptab, cpt), flags, 0); +} + +/** + * allocate a chunck of memory from a memory pool that is bound to the + * partition id \a cpt with the properites of \a flags. + */ +static inline void * +cfs_mem_cache_cpt_alloc(struct kmem_cache *cachep, struct cfs_cpt_table *cptab, + int cpt, gfp_t flags) +{ + return kmem_cache_alloc_node(cachep, flags, + cfs_cpt_spread_node(cptab, cpt)); +} + /** * iterate over all CPU partitions in \a cptab */ #define cfs_cpt_for_each(i, cptab) \ for (i = 0; i < cfs_cpt_number(cptab); i++) -#ifndef __read_mostly -# define __read_mostly -#endif - -#ifndef ____cacheline_aligned -#define ____cacheline_aligned -#endif - int cfs_cpu_init(void); void cfs_cpu_fini(void);