X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=libcfs%2Flibcfs%2Flinux%2Flinux-cpu.c;h=e8cd2a75c89f37903b8c922cd73df3837880c6a5;hb=bd3023bf147a661e00394ddee0efc2b2ac2f7887;hp=63bec8c2d2f3305f5f644694ad91d23c4fa8bf12;hpb=40fe3cd7283dfd1cee5f989483c517601ac773f8;p=fs%2Flustre-release.git diff --git a/libcfs/libcfs/linux/linux-cpu.c b/libcfs/libcfs/linux/linux-cpu.c index 63bec8c..e8cd2a7 100644 --- a/libcfs/libcfs/linux/linux-cpu.c +++ b/libcfs/libcfs/linux/linux-cpu.c @@ -13,17 +13,12 @@ * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA - * * GPL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -47,7 +42,7 @@ * 1 : disable multiple partitions * >1 : specify number of partitions */ -static int cpu_npartitions; +static int cpu_npartitions; module_param(cpu_npartitions, int, 0444); MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); @@ -64,34 +59,33 @@ MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); * * NB: If user specified cpu_pattern, cpu_npartitions will be ignored */ -static char *cpu_pattern = "N"; +static char *cpu_pattern = "N"; module_param(cpu_pattern, charp, 0444); MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) +void cfs_cpt_table_free(struct cfs_cpt_table *cptab) { int i; - if (cptab->ctb_cpu2cpt != NULL) { + if (cptab->ctb_cpu2cpt) { LIBCFS_FREE(cptab->ctb_cpu2cpt, nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0])); } - if (cptab->ctb_node2cpt != NULL) { + if (cptab->ctb_node2cpt) { LIBCFS_FREE(cptab->ctb_node2cpt, nr_node_ids * sizeof(cptab->ctb_node2cpt[0])); } - for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) { + for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - if (part->cpt_nodemask != NULL) { + if (part->cpt_nodemask) { LIBCFS_FREE(part->cpt_nodemask, sizeof(*part->cpt_nodemask)); } - if (part->cpt_cpumask != NULL) + if (part->cpt_cpumask) LIBCFS_FREE(part->cpt_cpumask, cpumask_size()); if (part->cpt_distance) { @@ -101,28 +95,27 @@ cfs_cpt_table_free(struct cfs_cpt_table *cptab) } } - if (cptab->ctb_parts != NULL) { + if (cptab->ctb_parts) { LIBCFS_FREE(cptab->ctb_parts, cptab->ctb_nparts * sizeof(cptab->ctb_parts[0])); } - if (cptab->ctb_nodemask != NULL) + if (cptab->ctb_nodemask) LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); - if (cptab->ctb_cpumask != NULL) + if (cptab->ctb_cpumask) LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size()); LIBCFS_FREE(cptab, sizeof(*cptab)); } EXPORT_SYMBOL(cfs_cpt_table_free); -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) +struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt) { struct cfs_cpt_table *cptab; - int i; + int i; LIBCFS_ALLOC(cptab, sizeof(*cptab)); - if (cptab == NULL) + if (!cptab) return NULL; cptab->ctb_nparts = ncpt; @@ -130,12 +123,12 @@ cfs_cpt_table_alloc(unsigned int ncpt) LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size()); LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); - if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL) + if (!cptab->ctb_cpumask || !cptab->ctb_nodemask) goto failed; LIBCFS_ALLOC(cptab->ctb_cpu2cpt, nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0])); - if (cptab->ctb_cpu2cpt == NULL) + if (!cptab->ctb_cpu2cpt) goto failed; memset(cptab->ctb_cpu2cpt, -1, @@ -143,14 +136,14 @@ cfs_cpt_table_alloc(unsigned int ncpt) LIBCFS_ALLOC(cptab->ctb_node2cpt, nr_node_ids * sizeof(cptab->ctb_node2cpt[0])); - if (cptab->ctb_node2cpt == NULL) + if (!cptab->ctb_node2cpt) goto failed; memset(cptab->ctb_node2cpt, -1, nr_node_ids * sizeof(cptab->ctb_node2cpt[0])); LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0])); - if (cptab->ctb_parts == NULL) + if (!cptab->ctb_parts) goto failed; for (i = 0; i < ncpt; i++) { @@ -178,30 +171,29 @@ failed: } EXPORT_SYMBOL(cfs_cpt_table_alloc); -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) +int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) { - char *tmp = buf; - int rc = -EFBIG; - int i; - int j; + char *tmp = buf; + int rc; + int i; + int j; for (i = 0; i < cptab->ctb_nparts; i++) { if (len <= 0) - goto out; + goto err; rc = snprintf(tmp, len, "%d\t:", i); len -= rc; if (len <= 0) - goto out; + goto err; tmp += rc; for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { rc = snprintf(tmp, len, " %d", j); len -= rc; if (len <= 0) - goto out; + goto err; tmp += rc; } @@ -209,40 +201,38 @@ cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) tmp++; len--; } - rc = 0; - out: - if (rc < 0) - return rc; return tmp - buf; + +err: + return -E2BIG; } EXPORT_SYMBOL(cfs_cpt_table_print); -int -cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len) +int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len) { - char *tmp = buf; - int rc = -EFBIG; - int i; - int j; + char *tmp = buf; + int rc; + int i; + int j; for (i = 0; i < cptab->ctb_nparts; i++) { if (len <= 0) - goto out; + goto err; rc = snprintf(tmp, len, "%d\t:", i); len -= rc; if (len <= 0) - goto out; + goto err; tmp += rc; for (j = 0; j < cptab->ctb_nparts; j++) { rc = snprintf(tmp, len, " %d:%d", - j, cptab->ctb_parts[i].cpt_distance[j]); + j, cptab->ctb_parts[i].cpt_distance[j]); len -= rc; if (len <= 0) - goto out; + goto err; tmp += rc; } @@ -250,24 +240,21 @@ cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len) tmp++; len--; } - rc = 0; - out: - if (rc < 0) - return rc; return tmp - buf; + +err: + return -E2BIG; } EXPORT_SYMBOL(cfs_cpt_distance_print); -int -cfs_cpt_number(struct cfs_cpt_table *cptab) +int cfs_cpt_number(struct cfs_cpt_table *cptab) { return cptab->ctb_nparts; } EXPORT_SYMBOL(cfs_cpt_number); -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) +int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) { LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -277,8 +264,7 @@ cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) } EXPORT_SYMBOL(cfs_cpt_weight); -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) +int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) { LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -290,8 +276,7 @@ cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) } EXPORT_SYMBOL(cfs_cpt_online); -cpumask_t * -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) +cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) { LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -300,8 +285,7 @@ cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) } EXPORT_SYMBOL(cfs_cpt_cpumask); -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) +nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) { LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -310,8 +294,7 @@ cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) } EXPORT_SYMBOL(cfs_cpt_nodemask); -unsigned -cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2) +unsigned int cfs_cpt_distance(struct cfs_cpt_table *cptab, int cpt1, int cpt2) { LASSERT(cpt1 == CFS_CPT_ANY || (cpt1 >= 0 && cpt1 < cptab->ctb_nparts)); LASSERT(cpt2 == CFS_CPT_ANY || (cpt2 >= 0 && cpt2 < cptab->ctb_nparts)); @@ -327,13 +310,13 @@ EXPORT_SYMBOL(cfs_cpt_distance); * Calculate the maximum NUMA distance between all nodes in the * from_mask and all nodes in the to_mask. */ -static unsigned -cfs_cpt_distance_calculate(nodemask_t *from_mask, nodemask_t *to_mask) +static unsigned int cfs_cpt_distance_calculate(nodemask_t *from_mask, + nodemask_t *to_mask) { - unsigned maximum; - unsigned distance; - int to; + unsigned int maximum; + unsigned int distance; int from; + int to; maximum = 0; for_each_node_mask(from, *from_mask) { @@ -364,43 +347,45 @@ static void cfs_cpt_del_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) static void cfs_cpt_add_node(struct cfs_cpt_table *cptab, int cpt, int node) { - int cpt2; struct cfs_cpu_partition *part; - struct cfs_cpu_partition *part2; if (!node_isset(node, *cptab->ctb_nodemask)) { + unsigned int dist; + /* first time node is added to the CPT table */ node_set(node, *cptab->ctb_nodemask); cptab->ctb_node2cpt[node] = cpt; - cptab->ctb_distance = cfs_cpt_distance_calculate( - cptab->ctb_nodemask, - cptab->ctb_nodemask); + + dist = cfs_cpt_distance_calculate(cptab->ctb_nodemask, + cptab->ctb_nodemask); + cptab->ctb_distance = dist; } part = &cptab->ctb_parts[cpt]; if (!node_isset(node, *part->cpt_nodemask)) { + int cpt2; + /* first time node is added to this CPT */ node_set(node, *part->cpt_nodemask); for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) { + struct cfs_cpu_partition *part2; + unsigned int dist; + part2 = &cptab->ctb_parts[cpt2]; - part->cpt_distance[cpt2] = cfs_cpt_distance_calculate( - part->cpt_nodemask, - part2->cpt_nodemask); - part2->cpt_distance[cpt] = cfs_cpt_distance_calculate( - part2->cpt_nodemask, - part->cpt_nodemask); + dist = cfs_cpt_distance_calculate(part->cpt_nodemask, + part2->cpt_nodemask); + part->cpt_distance[cpt2] = dist; + dist = cfs_cpt_distance_calculate(part2->cpt_nodemask, + part->cpt_nodemask); + part2->cpt_distance[cpt] = dist; } } } static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[cpt]; int cpu; - int cpt2; - struct cfs_cpu_partition *part; - struct cfs_cpu_partition *part2; - - part = &cptab->ctb_parts[cpt]; for_each_cpu(cpu, part->cpt_cpumask) { /* this CPT has other CPU belonging to this node? */ @@ -409,18 +394,24 @@ static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node) } if (cpu >= nr_cpu_ids && node_isset(node, *part->cpt_nodemask)) { + int cpt2; + /* No more CPUs in the node for this CPT. */ node_clear(node, *part->cpt_nodemask); for (cpt2 = 0; cpt2 < cptab->ctb_nparts; cpt2++) { + struct cfs_cpu_partition *part2; + unsigned int dist; + part2 = &cptab->ctb_parts[cpt2]; if (node_isset(node, *part2->cpt_nodemask)) cptab->ctb_node2cpt[node] = cpt2; - part->cpt_distance[cpt2] = cfs_cpt_distance_calculate( - part->cpt_nodemask, - part2->cpt_nodemask); - part2->cpt_distance[cpt] = cfs_cpt_distance_calculate( - part2->cpt_nodemask, - part->cpt_nodemask); + + dist = cfs_cpt_distance_calculate(part->cpt_nodemask, + part2->cpt_nodemask); + part->cpt_distance[cpt2] = dist; + dist = cfs_cpt_distance_calculate(part2->cpt_nodemask, + part->cpt_nodemask); + part2->cpt_distance[cpt] = dist; } } @@ -436,12 +427,11 @@ static void cfs_cpt_del_node(struct cfs_cpt_table *cptab, int cpt, int node) cptab->ctb_node2cpt[node] = -1; cptab->ctb_distance = cfs_cpt_distance_calculate(cptab->ctb_nodemask, - cptab->ctb_nodemask); + cptab->ctb_nodemask); } } -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) { LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); @@ -456,8 +446,16 @@ cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) return 0; } - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + if (cpumask_test_cpu(cpu, cptab->ctb_cpumask)) { + CDEBUG(D_INFO, "CPU %d is already in cpumask\n", cpu); + return 0; + } + + if (cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)) { + CDEBUG(D_INFO, "CPU %d is already in partition %d cpumask\n", + cpu, cptab->ctb_cpu2cpt[cpu]); + return 0; + } cfs_cpt_add_cpu(cptab, cpt, cpu); cfs_cpt_add_node(cptab, cpt, cpu_to_node(cpu)); @@ -466,8 +464,7 @@ cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) } EXPORT_SYMBOL(cfs_cpt_set_cpu); -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) { LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -480,14 +477,15 @@ cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) /* caller doesn't know the partition ID */ cpt = cptab->ctb_cpu2cpt[cpu]; if (cpt < 0) { /* not set in this CPT-table */ - CDEBUG(D_INFO, "Try to unset cpu %d which is " - "not in CPT-table %p\n", cpt, cptab); + CDEBUG(D_INFO, + "Try to unset cpu %d which is not in CPT-table %p\n", + cpt, cptab); return; } } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { CDEBUG(D_INFO, - "CPU %d is not in cpu-partition %d\n", cpu, cpt); + "CPU %d is not in CPU partition %d\n", cpu, cpt); return; } @@ -499,15 +497,16 @@ cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) } EXPORT_SYMBOL(cfs_cpt_unset_cpu); -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, const cpumask_t *mask) +int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, + const cpumask_t *mask) { int cpu; - if (cpumask_weight(mask) == 0 || + if (!cpumask_weight(mask) || cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { - CDEBUG(D_INFO, "No online CPU is found in the CPU mask " - "for CPU partition %d\n", cpt); + CDEBUG(D_INFO, + "No online CPU is found in the CPU mask for CPU partition %d\n", + cpt); return 0; } @@ -520,22 +519,22 @@ cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, const cpumask_t *mask) } EXPORT_SYMBOL(cfs_cpt_set_cpumask); -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, - const cpumask_t *mask) +void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, + const cpumask_t *mask) { int cpu; - for_each_cpu(cpu, mask) - cfs_cpt_unset_cpu(cptab, cpt, cpu); + for_each_cpu(cpu, mask) { + cfs_cpt_del_cpu(cptab, cpt, cpu); + cfs_cpt_del_node(cptab, cpt, cpu_to_node(cpu)); + } } EXPORT_SYMBOL(cfs_cpt_unset_cpumask); -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) +int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) { const cpumask_t *mask; - int cpu; + int cpu; if (node < 0 || node >= nr_node_ids) { CDEBUG(D_INFO, @@ -554,8 +553,7 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) } EXPORT_SYMBOL(cfs_cpt_set_node); -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) +void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) { const cpumask_t *mask; int cpu; @@ -575,36 +573,34 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) } EXPORT_SYMBOL(cfs_cpt_unset_node); -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, + const nodemask_t *mask) { - int i; + int node; - for_each_node_mask(i, *mask) { - if (!cfs_cpt_set_node(cptab, cpt, i)) - return 0; - } + for_each_node_mask(node, *mask) + cfs_cpt_set_node(cptab, cpt, node); return 1; } EXPORT_SYMBOL(cfs_cpt_set_nodemask); -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, + const nodemask_t *mask) { - int i; + int node; - for_each_node_mask(i, *mask) - cfs_cpt_unset_node(cptab, cpt, i); + for_each_node_mask(node, *mask) + cfs_cpt_unset_node(cptab, cpt, node); } EXPORT_SYMBOL(cfs_cpt_unset_nodemask); int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) { - nodemask_t *mask; - int weight; - int rotor; - int node; + nodemask_t *mask; + int weight; + int rotor; + int node = 0; /* convert CPU partition ID to HW node id */ @@ -614,35 +610,35 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) } else { mask = cptab->ctb_parts[cpt].cpt_nodemask; rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; + node = cptab->ctb_parts[cpt].cpt_node; } weight = nodes_weight(*mask); - LASSERT(weight > 0); + if (weight > 0) { + rotor %= weight; - rotor %= weight; - - for_each_node_mask(node, *mask) { - if (rotor-- == 0) - return node; + for_each_node_mask(node, *mask) { + if (!rotor--) + return node; + } } - LBUG(); - return 0; + return node; } EXPORT_SYMBOL(cfs_cpt_spread_node); -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) +int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) { - int cpu = smp_processor_id(); - int cpt = cptab->ctb_cpu2cpt[cpu]; + int cpu = smp_processor_id(); + int cpt = cptab->ctb_cpu2cpt[cpu]; if (cpt < 0) { if (!remap) return cpt; /* don't return negative value for safety of upper layer, - * instead we shadow the unknown cpu to a valid partition ID */ + * instead we shadow the unknown cpu to a valid partition ID + */ cpt = cpu % cptab->ctb_nparts; } @@ -650,8 +646,7 @@ cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) } EXPORT_SYMBOL(cfs_cpt_current); -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) +int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) { LASSERT(cpu >= 0 && cpu < nr_cpu_ids); @@ -659,8 +654,7 @@ cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) } EXPORT_SYMBOL(cfs_cpt_of_cpu); -int -cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node) +int cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node) { if (node < 0 || node > nr_node_ids) return CFS_CPT_ANY; @@ -669,13 +663,12 @@ cfs_cpt_of_node(struct cfs_cpt_table *cptab, int node) } EXPORT_SYMBOL(cfs_cpt_of_node); -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) +int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) { - cpumask_t *cpumask; - nodemask_t *nodemask; - int rc; - int i; + nodemask_t *nodemask; + cpumask_t *cpumask; + int cpu; + int rc; LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -687,20 +680,20 @@ cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) nodemask = cptab->ctb_parts[cpt].cpt_nodemask; } - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) { - CERROR("No online CPU found in CPU partition %d, did someone " - "do CPU hotplug on system? You might need to reload " - "Lustre modules to keep system working well.\n", cpt); - return -EINVAL; + if (!cpumask_intersects(cpumask, cpu_online_mask)) { + CDEBUG(D_INFO, + "No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", + cpt); + return -ENODEV; } - for_each_online_cpu(i) { - if (cpumask_test_cpu(i, cpumask)) + for_each_online_cpu(cpu) { + if (cpumask_test_cpu(cpu, cpumask)) continue; rc = set_cpus_allowed_ptr(current, cpumask); set_mems_allowed(*nodemask); - if (rc == 0) + if (!rc) schedule(); /* switch to allowed CPU */ return rc; @@ -715,58 +708,56 @@ EXPORT_SYMBOL(cfs_cpt_bind); * Choose max to \a number CPUs from \a node and set them in \a cpt. * We always prefer to choose CPU in the same core/socket. */ -static int -cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, - cpumask_t *node, int number) +static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, + cpumask_t *node_mask, int number) { - cpumask_t *socket = NULL; - cpumask_t *core = NULL; - int rc = 0; - int cpu; + cpumask_t *socket_mask = NULL; + cpumask_t *core_mask = NULL; + int rc = 0; + int cpu; + int i; LASSERT(number > 0); - if (number >= cpumask_weight(node)) { - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); + if (number >= cpumask_weight(node_mask)) { + while (!cpumask_empty(node_mask)) { + cpu = cpumask_first(node_mask); + cpumask_clear_cpu(cpu, node_mask); + + if (!cpu_online(cpu)) + continue; rc = cfs_cpt_set_cpu(cptab, cpt, cpu); if (!rc) return -EINVAL; - cpumask_clear_cpu(cpu, node); } return 0; } /* allocate scratch buffer */ - LIBCFS_ALLOC(socket, cpumask_size()); - LIBCFS_ALLOC(core, cpumask_size()); - if (socket == NULL || core == NULL) { + LIBCFS_ALLOC(socket_mask, cpumask_size()); + LIBCFS_ALLOC(core_mask, cpumask_size()); + if (!socket_mask || !core_mask) { rc = -ENOMEM; goto out; } - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); + while (!cpumask_empty(node_mask)) { + cpu = cpumask_first(node_mask); /* get cpumask for cores in the same socket */ - cpumask_copy(socket, topology_core_cpumask(cpu)); - cpumask_and(socket, socket, node); - - LASSERT(!cpumask_empty(socket)); - - while (!cpumask_empty(socket)) { - int i; - + cpumask_and(socket_mask, topology_core_cpumask(cpu), node_mask); + while (!cpumask_empty(socket_mask)) { /* get cpumask for hts in the same core */ - cpumask_copy(core, topology_sibling_cpumask(cpu)); - cpumask_and(core, core, node); + cpumask_and(core_mask, topology_sibling_cpumask(cpu), + node_mask); - LASSERT(!cpumask_empty(core)); + for_each_cpu(i, core_mask) { + cpumask_clear_cpu(i, socket_mask); + cpumask_clear_cpu(i, node_mask); - for_each_cpu(i, core) { - cpumask_clear_cpu(i, socket); - cpumask_clear_cpu(i, node); + if (!cpu_online(i)) + continue; rc = cfs_cpt_set_cpu(cptab, cpt, i); if (!rc) { @@ -774,294 +765,270 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, goto out; } - if (--number == 0) + if (!--number) goto out; } - cpu = cpumask_first(socket); + cpu = cpumask_first(socket_mask); } } out: - if (socket != NULL) - LIBCFS_FREE(socket, cpumask_size()); - if (core != NULL) - LIBCFS_FREE(core, cpumask_size()); + if (core_mask) + LIBCFS_FREE(core_mask, cpumask_size()); + if (socket_mask) + LIBCFS_FREE(socket_mask, cpumask_size()); return rc; } -#define CPT_WEIGHT_MIN 4u +#define CPT_WEIGHT_MIN 4 -static unsigned int -cfs_cpt_num_estimate(void) +static int cfs_cpt_num_estimate(void) { - unsigned nnode = num_online_nodes(); - unsigned ncpu = num_online_cpus(); - unsigned ncpt; + int nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id())); + int ncpu = num_online_cpus(); + int ncpt = 1; - if (ncpu <= CPT_WEIGHT_MIN) { - ncpt = 1; - goto out; - } + if (ncpu > CPT_WEIGHT_MIN) + for (ncpt = 2; ncpu > 2 * nthr * ncpt; ncpt++) + ; /* nothing */ - /* generate reasonable number of CPU partitions based on total number - * of CPUs, Preferred N should be power2 and match this condition: - * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 */ - for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) {} - - if (ncpt <= nnode) { /* fat numa system */ - while (nnode > ncpt) - nnode >>= 1; - - } else { /* ncpt > nnode */ - while ((nnode << 1) <= ncpt) - nnode <<= 1; - } - - ncpt = nnode; - -out: #if (BITS_PER_LONG == 32) /* config many CPU partitions on 32-bit system could consume - * too much memory */ - ncpt = min(2U, ncpt); + * too much memory + */ + ncpt = min(2, ncpt); #endif - while (ncpu % ncpt != 0) + while (ncpu % ncpt) ncpt--; /* worst case is 1 */ return ncpt; } -static struct cfs_cpt_table * -cfs_cpt_table_create(int ncpt) +static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt) { struct cfs_cpt_table *cptab = NULL; - cpumask_t *mask = NULL; - int cpt = 0; - int num; - int rc; - int i; - - rc = cfs_cpt_num_estimate(); + cpumask_t *node_mask = NULL; + int cpt = 0; + int node; + int num; + int rem; + int rc = 0; + + num = cfs_cpt_num_estimate(); if (ncpt <= 0) - ncpt = rc; + ncpt = num; - if (ncpt > num_online_cpus() || ncpt > 4 * rc) { - CWARN("CPU partition number %d is larger than suggested " - "value (%d), your system may have performance" - "issue or run out of memory while under pressure\n", - ncpt, rc); - } - - if (num_online_cpus() % ncpt != 0) { - CERROR("CPU number %d is not multiple of cpu_npartition %d, " - "please try different cpu_npartitions value or" - "set pattern string by cpu_pattern=STRING\n", - (int)num_online_cpus(), ncpt); - goto failed; + if (ncpt > num_online_cpus() || ncpt > 4 * num) { + CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", + ncpt, num); } cptab = cfs_cpt_table_alloc(ncpt); - if (cptab == NULL) { + if (!cptab) { CERROR("Failed to allocate CPU map(%d)\n", ncpt); + rc = -ENOMEM; goto failed; } - num = num_online_cpus() / ncpt; - if (num == 0) { - CERROR("CPU changed while setting CPU partition\n"); - goto failed; - } - - LIBCFS_ALLOC(mask, cpumask_size()); - if (mask == NULL) { + LIBCFS_ALLOC(node_mask, cpumask_size()); + if (!node_mask) { CERROR("Failed to allocate scratch cpumask\n"); + rc = -ENOMEM; goto failed; } - for_each_online_node(i) { - cpumask_copy(mask, cpumask_of_node(i)); - - while (!cpumask_empty(mask)) { - struct cfs_cpu_partition *part; - int n; - - /* Each emulated NUMA node has all allowed CPUs in - * the mask. - * End loop when all partitions have assigned CPUs. - */ - if (cpt == ncpt) - break; - - part = &cptab->ctb_parts[cpt]; - - n = num - cpumask_weight(part->cpt_cpumask); - LASSERT(n > 0); - - rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); - if (rc < 0) - goto failed; + num = num_online_cpus() / ncpt; + rem = num_online_cpus() % ncpt; + for_each_online_node(node) { + cpumask_copy(node_mask, cpumask_of_node(node)); + + while (cpt < ncpt && !cpumask_empty(node_mask)) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[cpt]; + int ncpu = cpumask_weight(part->cpt_cpumask); + + rc = cfs_cpt_choose_ncpus(cptab, cpt, node_mask, + num - ncpu); + if (rc < 0) { + rc = -EINVAL; + goto failed_mask; + } - LASSERT(num >= cpumask_weight(part->cpt_cpumask)); - if (num == cpumask_weight(part->cpt_cpumask)) + ncpu = cpumask_weight(part->cpt_cpumask); + if (ncpu == num + !!(rem > 0)) { cpt++; + rem--; + } } } - if (cpt != ncpt || - num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { - CERROR("Expect %d(%d) CPU partitions but got %d(%d), " - "CPU hotplug/unplug while setting?\n", - cptab->ctb_nparts, num, cpt, - cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); - goto failed; - } - - LIBCFS_FREE(mask, cpumask_size()); + LIBCFS_FREE(node_mask, cpumask_size()); return cptab; - failed: - CERROR("Failed to setup CPU-partition-table with %d " - "CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", - ncpt, num_online_nodes(), num_online_cpus()); - - if (mask != NULL) - LIBCFS_FREE(mask, cpumask_size()); +failed_mask: + if (node_mask) + LIBCFS_FREE(node_mask, cpumask_size()); +failed: + CERROR("Failed (rc = %d) to setup CPU partition table with %d partitions, online HW NUMA nodes: %d, HW CPU cores: %d.\n", + rc, ncpt, num_online_nodes(), num_online_cpus()); - if (cptab != NULL) + if (cptab) cfs_cpt_table_free(cptab); - return NULL; + return ERR_PTR(rc); } -static struct cfs_cpt_table * -cfs_cpt_table_create_pattern(char *pattern) +static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) { - struct cfs_cpt_table *cptab; - char *str; - int node = 0; - int ncpt = 0; - int high; - int cpt; - int rc; - int c; - int i; - - str = cfs_trimwhite(pattern); - if (*str == 'n' || *str == 'N') { - pattern = str + 1; - if (*pattern != '\0') { - node = 1; /* numa pattern */ + struct cfs_cpt_table *cptab; + char *pattern_dup; + char *bracket; + char *str; + int node = 0; + int ncpt = 0; + int cpt = 0; + int high; + int rc; + int c; + int i; + + pattern_dup = kstrdup(pattern, GFP_KERNEL); + if (!pattern_dup) { + CERROR("Failed to duplicate pattern '%s'\n", pattern); + return ERR_PTR(-ENOMEM); + } - } else { /* shortcut to create CPT from NUMA & CPU topology */ + str = cfs_trimwhite(pattern_dup); + if (*str == 'n' || *str == 'N') { + str++; /* skip 'N' char */ + node = 1; /* NUMA pattern */ + if (*str == '\0') { node = -1; - ncpt = num_online_nodes(); + for_each_online_node(i) { + if (!cpumask_empty(cpumask_of_node(i))) + ncpt++; + } + if (ncpt == 1) { /* single NUMA node */ + kfree(pattern_dup); + return cfs_cpt_table_create(cpu_npartitions); + } } } - if (ncpt == 0) { /* scanning bracket which is mark of partition */ - for (str = pattern;; str++, ncpt++) { - str = strchr(str, '['); - if (str == NULL) - break; + if (!ncpt) { /* scanning bracket which is mark of partition */ + bracket = str; + while ((bracket = strchr(bracket, '['))) { + bracket++; + ncpt++; } } - if (ncpt == 0 || + if (!ncpt || (node && ncpt > num_online_nodes()) || (!node && ncpt > num_online_cpus())) { - CERROR("Invalid pattern %s, or too many partitions %d\n", - pattern, ncpt); - return NULL; + CERROR("Invalid pattern '%s', or too many partitions %d\n", + pattern_dup, ncpt); + rc = -EINVAL; + goto err_free_str; } cptab = cfs_cpt_table_alloc(ncpt); - if (cptab == NULL) { - CERROR("Failed to allocate cpu partition table\n"); - return NULL; + if (!cptab) { + CERROR("Failed to allocate CPU partition table\n"); + rc = -ENOMEM; + goto err_free_str; } if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ - cpt = 0; for_each_online_node(i) { - if (cpt >= ncpt) { - CERROR("CPU changed while setting CPU " - "partition table, %d/%d\n", cpt, ncpt); - goto failed; - } + if (cpumask_empty(cpumask_of_node(i))) + continue; rc = cfs_cpt_set_node(cptab, cpt++, i); - if (!rc) - goto failed; + if (!rc) { + rc = -EINVAL; + goto err_free_table; + } } + kfree(pattern_dup); return cptab; } high = node ? nr_node_ids - 1 : nr_cpu_ids - 1; - for (str = cfs_trimwhite(pattern), c = 0;; c++) { - struct cfs_range_expr *range; - struct cfs_expr_list *el; - char *bracket = strchr(str, '['); - int n; - - if (bracket == NULL) { - if (*str != 0) { - CERROR("Invalid pattern %s\n", str); - goto failed; + for (str = cfs_trimwhite(str), c = 0; /* until break */; c++) { + struct cfs_range_expr *range; + struct cfs_expr_list *el; + int n; + + bracket = strchr(str, '['); + if (!bracket) { + if (*str) { + CERROR("Invalid pattern '%s'\n", str); + rc = -EINVAL; + goto err_free_table; } else if (c != ncpt) { - CERROR("expect %d partitions but found %d\n", + CERROR("Expect %d partitions but found %d\n", ncpt, c); - goto failed; + rc = -EINVAL; + goto err_free_table; } break; } if (sscanf(str, "%d%n", &cpt, &n) < 1) { - CERROR("Invalid cpu pattern %s\n", str); - goto failed; + CERROR("Invalid CPU pattern '%s'\n", str); + rc = -EINVAL; + goto err_free_table; } if (cpt < 0 || cpt >= ncpt) { CERROR("Invalid partition id %d, total partitions %d\n", cpt, ncpt); - goto failed; + rc = -EINVAL; + goto err_free_table; } - if (cfs_cpt_weight(cptab, cpt) != 0) { + if (cfs_cpt_weight(cptab, cpt)) { CERROR("Partition %d has already been set.\n", cpt); - goto failed; + rc = -EPERM; + goto err_free_table; } str = cfs_trimwhite(str + n); if (str != bracket) { - CERROR("Invalid pattern %s\n", str); - goto failed; + CERROR("Invalid pattern '%s'\n", str); + rc = -EINVAL; + goto err_free_table; } bracket = strchr(str, ']'); - if (bracket == NULL) { - CERROR("missing right bracket for cpt %d, %s\n", + if (!bracket) { + CERROR("Missing right bracket for partition %d in '%s'\n", cpt, str); - goto failed; + rc = -EINVAL; + goto err_free_table; } - if (cfs_expr_list_parse(str, (bracket - str) + 1, - 0, high, &el) != 0) { - CERROR("Can't parse number range: %s\n", str); - goto failed; + rc = cfs_expr_list_parse(str, (bracket - str) + 1, 0, high, + &el); + if (rc) { + CERROR("Can't parse number range in '%s'\n", str); + rc = -ERANGE; + goto err_free_table; } list_for_each_entry(range, &el->el_exprs, re_link) { for (i = range->re_lo; i <= range->re_hi; i++) { - if ((i - range->re_lo) % range->re_stride != 0) + if ((i - range->re_lo) % range->re_stride) continue; - rc = node ? cfs_cpt_set_node(cptab, cpt, i) : - cfs_cpt_set_cpu(cptab, cpt, i); + rc = node ? cfs_cpt_set_node(cptab, cpt, i) + : cfs_cpt_set_cpu(cptab, cpt, i); if (!rc) { cfs_expr_list_free(el); - goto failed; + rc = -EINVAL; + goto err_free_table; } } } @@ -1070,25 +1037,51 @@ cfs_cpt_table_create_pattern(char *pattern) if (!cfs_cpt_online(cptab, cpt)) { CERROR("No online CPU is found on partition %d\n", cpt); - goto failed; + rc = -ENODEV; + goto err_free_table; } str = cfs_trimwhite(bracket + 1); } + kfree(pattern_dup); return cptab; - failed: +err_free_table: cfs_cpt_table_free(cptab); - return NULL; +err_free_str: + kfree(pattern_dup); + return ERR_PTR(rc); } #ifdef CONFIG_HOTPLUG_CPU -static int -cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +#ifdef HAVE_HOTPLUG_STATE_MACHINE +static enum cpuhp_state lustre_cpu_online; + +static int cfs_cpu_online(unsigned int cpu) +{ + return 0; +} +#endif + +static int cfs_cpu_dead(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - bool warn; + bool warn; + + /* if all HTs in a core are offline, it may break affinity */ + warn = cpumask_any_and(topology_sibling_cpumask(cpu), + cpu_online_mask) >= nr_cpu_ids; + CDEBUG(warn ? D_WARNING : D_INFO, + "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n", + cpu); + return 0; +} + +#ifndef HAVE_HOTPLUG_STATE_MACHINE +static int cfs_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; switch (action) { case CPU_DEAD: @@ -1102,13 +1095,7 @@ cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) break; } - /* if all HTs in a core are offline, it may break affinity */ - warn = cpumask_any_and(topology_sibling_cpumask(cpu), - cpu_online_mask) >= nr_cpu_ids; - CDEBUG(warn ? D_WARNING : D_INFO, - "Lustre: can't support CPU plug-out well now, " - "performance and stability could be impacted" - "[CPU %u action: %lx]\n", cpu, action); + cfs_cpu_dead(cpu); } return NOTIFY_OK; @@ -1118,64 +1105,81 @@ static struct notifier_block cfs_cpu_notifier = { .notifier_call = cfs_cpu_notify, .priority = 0 }; +#endif /* !HAVE_HOTPLUG_STATE_MACHINE */ +#endif /* CONFIG_HOTPLUG_CPU */ -#endif - -void -cfs_cpu_fini(void) +void cfs_cpu_fini(void) { - if (cfs_cpt_table != NULL) + if (!IS_ERR_OR_NULL(cfs_cpt_table)) cfs_cpt_table_free(cfs_cpt_table); #ifdef CONFIG_HOTPLUG_CPU +#ifdef HAVE_HOTPLUG_STATE_MACHINE + if (lustre_cpu_online > 0) + cpuhp_remove_state_nocalls(lustre_cpu_online); + cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); +#else unregister_hotcpu_notifier(&cfs_cpu_notifier); -#endif +#endif /* !HAVE_HOTPLUG_STATE_MACHINE */ +#endif /* CONFIG_HOTPLUG_CPU */ } -int -cfs_cpu_init(void) +int cfs_cpu_init(void) { - LASSERT(cfs_cpt_table == NULL); + int ret = -EINVAL; + + LASSERT(!cfs_cpt_table); #ifdef CONFIG_HOTPLUG_CPU +#ifdef HAVE_HOTPLUG_STATE_MACHINE + ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD, + "fs/lustre/cfe:dead", NULL, + cfs_cpu_dead); + if (ret < 0) + goto failed; + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "fs/lustre/cfe:online", + cfs_cpu_online, NULL); + if (ret < 0) + goto failed; + lustre_cpu_online = ret; +#else register_hotcpu_notifier(&cfs_cpu_notifier); -#endif - get_online_cpus(); - if (*cpu_pattern != 0) { - char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL); +#endif /* !HAVE_HOTPLUG_STATE_MACHINE */ +#endif /* CONFIG_HOTPLUG_CPU */ + ret = -EINVAL; - if (cpu_pattern_dup == NULL) { - CERROR("Failed to duplicate cpu_pattern\n"); - goto failed; - } - - cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup); - kfree(cpu_pattern_dup); - if (cfs_cpt_table == NULL) { - CERROR("Failed to create cptab from pattern %s\n", + get_online_cpus(); + if (*cpu_pattern) { + cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); + if (IS_ERR(cfs_cpt_table)) { + CERROR("Failed to create cptab from pattern '%s'\n", cpu_pattern); + ret = PTR_ERR(cfs_cpt_table); goto failed; } } else { cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions); - if (cfs_cpt_table == NULL) { - CERROR("Failed to create ptable with npartitions %d\n", + if (IS_ERR(cfs_cpt_table)) { + CERROR("Failed to create cptab with npartitions %d\n", cpu_npartitions); + ret = PTR_ERR(cfs_cpt_table); goto failed; } } + put_online_cpus(); - LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n", - num_online_nodes(), num_online_cpus(), - cfs_cpt_number(cfs_cpt_table)); + LCONSOLE(0, "HW NUMA nodes: %d, HW CPU cores: %d, npartitions: %d\n", + num_online_nodes(), num_online_cpus(), + cfs_cpt_number(cfs_cpt_table)); return 0; failed: put_online_cpus(); cfs_cpu_fini(); - return -1; + return ret; } #endif