X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=libcfs%2Finclude%2Flibcfs%2Flibcfs_cpu.h;h=380be11f8587c48a1e2a0a8f543744232168bee2;hb=4aed5234f1123efc06c5c7e702085a461a8aae12;hp=736f2b273c81389bb7980916095fba0f458836fd;hpb=617e8e1229637908d4cce6725878dd5668960420;p=fs%2Flustre-release.git diff --git a/libcfs/include/libcfs/libcfs_cpu.h b/libcfs/include/libcfs/libcfs_cpu.h index 736f2b2..380be11 100644 --- a/libcfs/include/libcfs/libcfs_cpu.h +++ b/libcfs/include/libcfs/libcfs_cpu.h @@ -22,7 +22,8 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -52,12 +53,12 @@ * cpu_npartitions=1: * core[0, 1, ... 7] = partition[0] * - * . User can also specifiy CPU partitions by string pattern + * . User can also specify CPU partitions by string pattern * * Examples: cpu_partitions="0[0,1], 1[2,3]" * cpu_partitions="N 0[0-3], 1[4-8]" * - * The first charactor "N" means following numbers are numa ID + * The first character "N" means following numbers are numa ID * * . NUMA allocators, CPU affinity threads are built over CPU partitions, * instead of HW CPUs or HW nodes. @@ -67,7 +68,7 @@ * configured by cpu_npartitions of the global cfs_cpt_table * * . If cpu_npartitions=1(all CPUs in one pool), lustre should work the - * same way as 2.2 or earlier verison + * same way as 2.2 or earlier versions * * Author: liang@whamcloud.com */ @@ -77,8 +78,16 @@ #ifndef HAVE_LIBCFS_CPT -typedef unsigned long cpumask_t; -typedef unsigned long nodemask_t; +#ifndef __KERNEL__ +typedef struct nodemask { DECLARE_BITMAP(bits, 1); } nodemask_t; +typedef struct cpumask { DECLARE_BITMAP(bits, 1); } cpumask_t; + +#define node_set(node, dst) __node_set((node), &(dst)) +static __always_inline void __node_set(int node, nodemask_t *dstp) +{ + set_bit(node, dstp->bits); +} +#endif /* __KERNEL__ */ struct cfs_cpt_table { /* # of CPU partitions */ @@ -195,6 +204,143 @@ void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt); int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt); /** + * return number of HTs in the same core of \a cpu + */ +int cfs_cpu_ht_nsiblings(int cpu); + +/* + * allocate per-cpu-partition data, returned value is an array of pointers, + * variable can be indexed by CPU ID. + * cptab != NULL: size of array is number of CPU partitions + * cptab == NULL: size of array is number of HW cores + */ +void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); +/* + * destory per-cpu-partition variable + */ +void cfs_percpt_free(void *vars); +int cfs_percpt_number(void *vars); + +#define cfs_percpt_for_each(var, i, vars) \ + for (i = 0; i < cfs_percpt_number(vars) && \ + ((var) = (vars)[i]) != NULL; i++) + +/* + * percpu partition lock + * + * There are some use-cases like this in Lustre: + * . each CPU partition has it's own private data which is frequently changed, + * and mostly by the local CPU partition. + * . all CPU partitions share some global data, these data are rarely changed. + * + * LNet is typical example. + * CPU partition lock is designed for this kind of use-cases: + * . each CPU partition has it's own private lock + * . change on private data just needs to take the private lock + * . read on shared data just needs to take _any_ of private locks + * . change on shared data needs to take _all_ private locks, + * which is slow and should be really rare. + */ +enum { + CFS_PERCPT_LOCK_EX = -1, /* negative */ +}; + +struct cfs_percpt_lock { + /* cpu-partition-table for this lock */ + struct cfs_cpt_table *pcl_cptab; + /* exclusively locked */ + unsigned int pcl_locked; + /* private lock table */ + spinlock_t **pcl_locks; +}; + +/* return number of private locks */ +#define cfs_percpt_lock_num(pcl) cfs_cpt_number(pcl->pcl_cptab) + +/* + * create a cpu-partition lock based on CPU partition table \a cptab, + * each private lock has extra \a psize bytes padding data + */ +struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab, + struct lock_class_key *keys); +/* destroy a cpu-partition lock */ +void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); + +/* lock private lock \a index of \a pcl */ +void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); +/* unlock private lock \a index of \a pcl */ +void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); + +#define CFS_PERCPT_LOCK_KEYS 256 + +/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */ +#define cfs_percpt_lock_alloc(cptab) \ +({ \ + static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS]; \ + struct cfs_percpt_lock *___lk; \ + \ + if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS) \ + ___lk = cfs_percpt_lock_create(cptab, NULL); \ + else \ + ___lk = cfs_percpt_lock_create(cptab, ___keys); \ + ___lk; \ +}) + +/** + * allocate \a nr_bytes of physical memory from a contiguous region with the + * properties of \a flags which are bound to the partition id \a cpt. This + * function should only be used for the case when only a few pages of memory + * are need. + */ +static inline void * +cfs_cpt_malloc(struct cfs_cpt_table *cptab, int cpt, size_t nr_bytes, + gfp_t flags) +{ + return kmalloc_node(nr_bytes, flags, + cfs_cpt_spread_node(cptab, cpt)); +} + +/** + * allocate \a nr_bytes of virtually contiguous memory that is bound to the + * partition id \a cpt. + */ +static inline void * +cfs_cpt_vzalloc(struct cfs_cpt_table *cptab, int cpt, size_t nr_bytes) +{ + /* vzalloc_node() sets __GFP_FS by default but no current Kernel + * exported entry-point allows for both a NUMA node specification + * and a custom allocation flags mask. This may be an issue since + * __GFP_FS usage can cause some deadlock situations in our code, + * like when memory reclaim started, within the same context of a + * thread doing FS operations, that can also attempt conflicting FS + * operations, ... + */ + return vzalloc_node(nr_bytes, cfs_cpt_spread_node(cptab, cpt)); +} + +/** + * allocate a single page of memory with the properties of \a flags were + * that page is bound to the partition id \a cpt. + */ +static inline struct page * +cfs_page_cpt_alloc(struct cfs_cpt_table *cptab, int cpt, gfp_t flags) +{ + return alloc_pages_node(cfs_cpt_spread_node(cptab, cpt), flags, 0); +} + +/** + * allocate a chunck of memory from a memory pool that is bound to the + * partition id \a cpt with the properites of \a flags. + */ +static inline void * +cfs_mem_cache_cpt_alloc(struct kmem_cache *cachep, struct cfs_cpt_table *cptab, + int cpt, gfp_t flags) +{ + return kmem_cache_alloc_node(cachep, flags, + cfs_cpt_spread_node(cptab, cpt)); +} + +/** * iterate over all CPU partitions in \a cptab */ #define cfs_cpt_for_each(i, cptab) \