#include <linux/cpu.h>
#include <linux/sched.h>
-#include <libcfs/libcfs_cpu.h>
#include <libcfs/libcfs.h>
+#include <libcfs/libcfs_cpu.h>
+
+/** virtual processing unit */
+struct cfs_cpu_partition {
+ /* CPUs mask for this partition */
+ cpumask_var_t cpt_cpumask;
+ /* nodes mask for this partition */
+ nodemask_t *cpt_nodemask;
+ /* NUMA distance between CPTs */
+ unsigned int *cpt_distance;
+ /* spread rotor for NUMA allocator */
+ unsigned int cpt_spread_rotor;
+ /* NUMA node if cpt_nodemask is empty */
+ int cpt_node;
+};
+
+/** descriptor for CPU partitions */
+struct cfs_cpt_table {
+ /* spread rotor for NUMA allocator */
+ unsigned int ctb_spread_rotor;
+ /* maximum NUMA distance between all nodes in table */
+ unsigned int ctb_distance;
+ /* # of CPU partitions */
+ int ctb_nparts;
+ /* partitions tables */
+ struct cfs_cpu_partition *ctb_parts;
+ /* shadow HW CPU to CPU partition ID */
+ int *ctb_cpu2cpt;
+ /* all cpus in this partition table */
+ cpumask_var_t ctb_cpumask;
+ /* shadow HW node to CPU partition ID */
+ int *ctb_node2cpt;
+ /* all nodes in this partition table */
+ nodemask_t *ctb_nodemask;
+};
/** Global CPU partition table */
-struct cfs_cpt_table *cfs_cpt_table __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_table);
+struct cfs_cpt_table *cfs_cpt_tab __read_mostly;
+EXPORT_SYMBOL(cfs_cpt_tab);
/**
* modparam for setting number of partitions
module_param(cpu_pattern, charp, 0444);
MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
-#ifdef CONFIG_SMP
struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
{
struct cfs_cpt_table *cptab;
cptab->ctb_nparts = ncpt;
- LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
- if (!cptab->ctb_cpumask)
+ if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS))
goto failed_alloc_cpumask;
LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
if (!cptab->ctb_nodemask)
goto failed_alloc_nodemask;
- LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
- nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
+ CFS_ALLOC_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
if (!cptab->ctb_cpu2cpt)
goto failed_alloc_cpu2cpt;
memset(cptab->ctb_cpu2cpt, -1,
nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
- LIBCFS_ALLOC(cptab->ctb_node2cpt,
- nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
+ CFS_ALLOC_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
if (!cptab->ctb_node2cpt)
goto failed_alloc_node2cpt;
memset(cptab->ctb_node2cpt, -1,
nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
- LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
+ CFS_ALLOC_PTR_ARRAY(cptab->ctb_parts, ncpt);
if (!cptab->ctb_parts)
goto failed_alloc_ctb_parts;
for (i = 0; i < ncpt; i++) {
struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
- LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
- if (!part->cpt_cpumask)
+ if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS))
goto failed_setting_ctb_parts;
LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
if (!part->cpt_nodemask)
goto failed_setting_ctb_parts;
- LIBCFS_ALLOC(part->cpt_distance,
- cptab->ctb_nparts * sizeof(part->cpt_distance[0]));
+ CFS_ALLOC_PTR_ARRAY(part->cpt_distance, cptab->ctb_nparts);
if (!part->cpt_distance)
goto failed_setting_ctb_parts;
sizeof(*part->cpt_nodemask));
}
- if (part->cpt_cpumask)
- LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+ free_cpumask_var(part->cpt_cpumask);
if (part->cpt_distance) {
- LIBCFS_FREE(part->cpt_distance,
- cptab->ctb_nparts *
- sizeof(part->cpt_distance[0]));
+ CFS_FREE_PTR_ARRAY(part->cpt_distance,
+ cptab->ctb_nparts);
}
}
- if (cptab->ctb_parts) {
- LIBCFS_FREE(cptab->ctb_parts,
- cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
- }
+ if (cptab->ctb_parts)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_parts, cptab->ctb_nparts);
+
failed_alloc_ctb_parts:
- if (cptab->ctb_node2cpt) {
- LIBCFS_FREE(cptab->ctb_node2cpt,
- nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
- }
+ if (cptab->ctb_node2cpt)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
+
failed_alloc_node2cpt:
- if (cptab->ctb_cpu2cpt) {
- LIBCFS_FREE(cptab->ctb_cpu2cpt,
- nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
- }
+ if (cptab->ctb_cpu2cpt)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
+
failed_alloc_cpu2cpt:
if (cptab->ctb_nodemask)
LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
failed_alloc_nodemask:
- if (cptab->ctb_cpumask)
- LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+ free_cpumask_var(cptab->ctb_cpumask);
failed_alloc_cpumask:
LIBCFS_FREE(cptab, sizeof(*cptab));
return NULL;
{
int i;
- if (cptab->ctb_cpu2cpt) {
- LIBCFS_FREE(cptab->ctb_cpu2cpt,
- nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
- }
+ if (cptab->ctb_cpu2cpt)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
- if (cptab->ctb_node2cpt) {
- LIBCFS_FREE(cptab->ctb_node2cpt,
- nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
- }
+ if (cptab->ctb_node2cpt)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
sizeof(*part->cpt_nodemask));
}
- if (part->cpt_cpumask)
- LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+ free_cpumask_var(part->cpt_cpumask);
- if (part->cpt_distance) {
- LIBCFS_FREE(part->cpt_distance,
- cptab->ctb_nparts *
- sizeof(part->cpt_distance[0]));
- }
+ if (part->cpt_distance)
+ CFS_FREE_PTR_ARRAY(part->cpt_distance,
+ cptab->ctb_nparts);
}
- if (cptab->ctb_parts) {
- LIBCFS_FREE(cptab->ctb_parts,
- cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
- }
+ if (cptab->ctb_parts)
+ CFS_FREE_PTR_ARRAY(cptab->ctb_parts, cptab->ctb_nparts);
if (cptab->ctb_nodemask)
LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
- if (cptab->ctb_cpumask)
- LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+ free_cpumask_var(cptab->ctb_cpumask);
LIBCFS_FREE(cptab, sizeof(*cptab));
}
}
EXPORT_SYMBOL(cfs_cpt_online);
-cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
{
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
return cpt == CFS_CPT_ANY ?
- cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
+ &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
}
EXPORT_SYMBOL(cfs_cpt_cpumask);
{
nodemask_t *mask;
int weight;
- int rotor;
+ unsigned int rotor;
int node = 0;
/* convert CPU partition ID to HW node id */
static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
cpumask_t *node_mask, int number)
{
- cpumask_t *socket_mask = NULL;
- cpumask_t *core_mask = NULL;
+ cpumask_var_t socket_mask;
+ cpumask_var_t core_mask;
int rc = 0;
int cpu;
int i;
return 0;
}
- /* allocate scratch buffer */
- LIBCFS_ALLOC(socket_mask, cpumask_size());
- LIBCFS_ALLOC(core_mask, cpumask_size());
- if (!socket_mask || !core_mask) {
+ /*
+ * Allocate scratch buffers
+ * As we cannot initialize a cpumask_var_t, we need
+ * to alloc both before we can risk trying to free either
+ */
+ if (!zalloc_cpumask_var(&socket_mask, GFP_NOFS))
+ rc = -ENOMEM;
+ if (!zalloc_cpumask_var(&core_mask, GFP_NOFS))
rc = -ENOMEM;
+ if (rc)
goto out;
- }
while (!cpumask_empty(node_mask)) {
cpu = cpumask_first(node_mask);
}
out:
- if (core_mask)
- LIBCFS_FREE(core_mask, cpumask_size());
- if (socket_mask)
- LIBCFS_FREE(socket_mask, cpumask_size());
+ free_cpumask_var(socket_mask);
+ free_cpumask_var(core_mask);
return rc;
}
-#define CPT_WEIGHT_MIN 4
+#define CPT_WEIGHT_MIN 4u
-static int cfs_cpt_num_estimate(void)
+static unsigned int cfs_cpt_num_estimate(void)
{
- int nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
- int ncpu = num_online_cpus();
- int ncpt = 1;
+ unsigned int nthr;
+ unsigned int ncpu = num_online_cpus();
+ unsigned int ncpt = 1;
+
+ preempt_disable();
+ nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
+ preempt_enable();
if (ncpu > CPT_WEIGHT_MIN)
for (ncpt = 2; ncpu > 2 * nthr * ncpt; ncpt++)
/* config many CPU partitions on 32-bit system could consume
* too much memory
*/
- ncpt = min(2, ncpt);
+ ncpt = min(2U, ncpt);
#endif
while (ncpu % ncpt)
ncpt--; /* worst case is 1 */
static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
{
struct cfs_cpt_table *cptab = NULL;
- cpumask_t *node_mask = NULL;
+ cpumask_var_t node_mask;
int cpt = 0;
int node;
int num;
if (ncpt <= 0)
ncpt = num;
- if (ncpt > num_online_cpus() || ncpt > 4 * num) {
+ if (ncpt > num_online_cpus()) {
+ rc = -EINVAL;
+ CERROR("libcfs: CPU partition count %d > cores %d: rc = %d\n",
+ ncpt, num_online_cpus(), rc);
+ goto failed;
+ }
+
+ if (ncpt > 4 * num) {
CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
ncpt, num);
}
goto failed;
}
- LIBCFS_ALLOC(node_mask, cpumask_size());
- if (!node_mask) {
+ if (!zalloc_cpumask_var(&node_mask, GFP_NOFS)) {
CERROR("Failed to allocate scratch cpumask\n");
rc = -ENOMEM;
goto failed;
int ncpu = cpumask_weight(part->cpt_cpumask);
rc = cfs_cpt_choose_ncpus(cptab, cpt, node_mask,
- num - ncpu);
+ (rem > 0) + num - ncpu);
if (rc < 0) {
rc = -EINVAL;
goto failed_mask;
}
}
- LIBCFS_FREE(node_mask, cpumask_size());
+ free_cpumask_var(node_mask);
return cptab;
failed_mask:
- if (node_mask)
- LIBCFS_FREE(node_mask, cpumask_size());
+ free_cpumask_var(node_mask);
failed:
CERROR("Failed (rc = %d) to setup CPU partition table with %d partitions, online HW NUMA nodes: %d, HW CPU cores: %d.\n",
rc, ncpt, num_online_nodes(), num_online_cpus());
void cfs_cpu_fini(void)
{
- if (!IS_ERR_OR_NULL(cfs_cpt_table))
- cfs_cpt_table_free(cfs_cpt_table);
+ if (!IS_ERR_OR_NULL(cfs_cpt_tab))
+ cfs_cpt_table_free(cfs_cpt_tab);
#ifdef CONFIG_HOTPLUG_CPU
#ifdef HAVE_HOTPLUG_STATE_MACHINE
{
int ret;
- LASSERT(!cfs_cpt_table);
+ LASSERT(!cfs_cpt_tab);
#ifdef CONFIG_HOTPLUG_CPU
#ifdef HAVE_HOTPLUG_STATE_MACHINE
get_online_cpus();
if (*cpu_pattern) {
- cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
- if (IS_ERR(cfs_cpt_table)) {
+ cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern);
+ if (IS_ERR(cfs_cpt_tab)) {
CERROR("Failed to create cptab from pattern '%s'\n",
cpu_pattern);
- ret = PTR_ERR(cfs_cpt_table);
+ ret = PTR_ERR(cfs_cpt_tab);
goto failed_alloc_table;
}
} else {
- cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
- if (IS_ERR(cfs_cpt_table)) {
+ cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions);
+ if (IS_ERR(cfs_cpt_tab)) {
CERROR("Failed to create cptab with npartitions %d\n",
cpu_npartitions);
- ret = PTR_ERR(cfs_cpt_table);
+ ret = PTR_ERR(cfs_cpt_tab);
goto failed_alloc_table;
}
}
LCONSOLE(0, "HW NUMA nodes: %d, HW CPU cores: %d, npartitions: %d\n",
num_online_nodes(), num_online_cpus(),
- cfs_cpt_number(cfs_cpt_table));
+ cfs_cpt_number(cfs_cpt_tab));
return 0;
failed_alloc_table:
put_online_cpus();
- if (cfs_cpt_table)
- cfs_cpt_table_free(cfs_cpt_table);
+ if (!IS_ERR_OR_NULL(cfs_cpt_tab))
+ cfs_cpt_table_free(cfs_cpt_tab);
#ifdef CONFIG_HOTPLUG_CPU
#ifdef HAVE_HOTPLUG_STATE_MACHINE
#endif /* CONFIG_HOTPLUG_CPU */
return ret;
}
-
-#else /* ! CONFIG_SMP */
-
-struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
-{
- struct cfs_cpt_table *cptab;
-
- if (ncpt != 1) {
- CERROR("Can't support cpu partition number %d\n", ncpt);
- return NULL;
- }
-
- LIBCFS_ALLOC(cptab, sizeof(*cptab));
- if (!cptab)
- return NULL;
-
- cpumask_set_cpu(0, cptab->ctb_cpumask);
- node_set(0, cptab->ctb_nodemask);
-
- return cptab;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- int rc;
-
- rc = snprintf(buf, len, "0\t: 0\n");
- len -= rc;
- if (len <= 0)
- return -EFBIG;
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
- int rc;
-
- rc = snprintf(buf, len, "0\t: 0:1\n");
- len -= rc;
- if (len <= 0)
- return -EFBIG;
-
- return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_distance_print);
-
-void cfs_cpu_fini(void)
-{
- if (cfs_cpt_table) {
- cfs_cpt_table_free(cfs_cpt_table);
- cfs_cpt_table = NULL;
- }
-}
-
-int cfs_cpu_init(void)
-{
- cfs_cpt_table = cfs_cpt_table_alloc(1);
-
- return cfs_cpt_table ? 0 : -1;
-}
-
-#endif /* !CONFIG_SMP */