X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=libcfs%2Flibcfs%2Flinux%2Flinux-cpu.c;h=d3d40ce14c9e392b5a5ab0cf31ee4c8635dc21e5;hp=b6852967901a888a17140081065e6ee7e9aa68be;hb=ae6fc0156d11ae730fbb284085a2050006b570c7;hpb=148a7fba3d289caf053db937c8a08f63528dada1 diff --git a/libcfs/libcfs/linux/linux-cpu.c b/libcfs/libcfs/linux/linux-cpu.c index b685296..d3d40ce 100644 --- a/libcfs/libcfs/linux/linux-cpu.c +++ b/libcfs/libcfs/linux/linux-cpu.c @@ -22,7 +22,8 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2013, Intel Corporation. + * + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -47,7 +48,8 @@ * >1 : specify number of partitions */ static int cpu_npartitions; -CFS_MODULE_PARM(cpu_npartitions, "i", int, 0444, "# of CPU partitions"); +module_param(cpu_npartitions, int, 0444); +MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); /** * modparam for setting CPU partitions patterns: @@ -58,10 +60,13 @@ CFS_MODULE_PARM(cpu_npartitions, "i", int, 0444, "# of CPU partitions"); * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket * are NUMA node ID, number before bracket is CPU partition ID. * + * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology + * * NB: If user specified cpu_pattern, cpu_npartitions will be ignored */ static char *cpu_pattern = ""; -CFS_MODULE_PARM(cpu_pattern, "s", charp, 0444, "CPU partitions pattern"); +module_param(cpu_pattern, charp, 0444); +MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); struct cfs_cpt_data { /* serialize hotplug etc */ @@ -69,45 +74,35 @@ struct cfs_cpt_data { /* reserved for hotplug */ unsigned long cpt_version; /* mutex to protect cpt_cpumask */ - struct semaphore cpt_mutex; + struct mutex cpt_mutex; /* scratch buffer for set/unset_node */ cpumask_t *cpt_cpumask; }; static struct cfs_cpt_data cpt_data; -void -cfs_cpu_core_siblings(int cpu, cpumask_t *mask) -{ - /* return cpumask of cores in the same socket */ - cpumask_copy(mask, topology_core_cpumask(cpu)); -} -EXPORT_SYMBOL(cfs_cpu_core_siblings); - /* return number of cores in the same socket of \a cpu */ int cfs_cpu_core_nsiblings(int cpu) { int num; - down(&cpt_data.cpt_mutex); + mutex_lock(&cpt_data.cpt_mutex); - cfs_cpu_core_siblings(cpu, cpt_data.cpt_cpumask); - num = cpus_weight(*cpt_data.cpt_cpumask); + cpumask_copy(cpt_data.cpt_cpumask, topology_core_cpumask(cpu)); + num = cpumask_weight(cpt_data.cpt_cpumask); - up(&cpt_data.cpt_mutex); + mutex_unlock(&cpt_data.cpt_mutex); return num; } -EXPORT_SYMBOL(cfs_cpu_core_nsiblings); /* return cpumask of HTs in the same core */ void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask) { - cpumask_copy(mask, topology_thread_cpumask(cpu)); + cpumask_copy(mask, topology_sibling_cpumask(cpu)); } -EXPORT_SYMBOL(cfs_cpu_ht_siblings); /* return number of HTs in the same core of \a cpu */ int @@ -115,12 +110,7 @@ cfs_cpu_ht_nsiblings(int cpu) { int num; - down(&cpt_data.cpt_mutex); - - cfs_cpu_ht_siblings(cpu, cpt_data.cpt_cpumask); - num = cpus_weight(*cpt_data.cpt_cpumask); - - up(&cpt_data.cpt_mutex); + num = cpumask_weight(topology_sibling_cpumask(cpu)); return num; } @@ -129,9 +119,13 @@ EXPORT_SYMBOL(cfs_cpu_ht_nsiblings); void cfs_node_to_cpumask(int node, cpumask_t *mask) { - cpumask_copy(mask, cpumask_of_node(node)); + const cpumask_t *tmp = cpumask_of_node(node); + + if (tmp != NULL) + cpumask_copy(mask, tmp); + else + cpumask_clear(mask); } -EXPORT_SYMBOL(cfs_node_to_cpumask); void cfs_cpt_table_free(struct cfs_cpt_table *cptab) @@ -242,7 +236,7 @@ cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) } tmp += rc; - for_each_cpu_mask(j, *cptab->ctb_parts[i].cpt_cpumask) { + for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { rc = snprintf(tmp, len, "%d ", j); len -= rc; if (len <= 0) { @@ -278,8 +272,8 @@ cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); return cpt == CFS_CPT_ANY ? - cpus_weight(*cptab->ctb_cpumask) : - cpus_weight(*cptab->ctb_parts[cpt].cpt_cpumask); + cpumask_weight(cptab->ctb_cpumask) : + cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); } EXPORT_SYMBOL(cfs_cpt_weight); @@ -289,8 +283,10 @@ cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); return cpt == CFS_CPT_ANY ? - any_online_cpu(*cptab->ctb_cpumask) != NR_CPUS : - any_online_cpu(*cptab->ctb_parts[cpt].cpt_cpumask) != NR_CPUS; + cpumask_any_and(cptab->ctb_cpumask, + cpu_online_mask) < nr_cpu_ids : + cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, + cpu_online_mask) < nr_cpu_ids; } EXPORT_SYMBOL(cfs_cpt_online); @@ -321,7 +317,7 @@ cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); - if (cpu < 0 || cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); return 0; } @@ -334,11 +330,11 @@ cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) cptab->ctb_cpu2cpt[cpu] = cpt; - LASSERT(!cpu_isset(cpu, *cptab->ctb_cpumask)); - LASSERT(!cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask)); + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - cpu_set(cpu, *cptab->ctb_cpumask); - cpu_set(cpu, *cptab->ctb_parts[cpt].cpt_cpumask); + cpumask_set_cpu(cpu, cptab->ctb_cpumask); + cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); node = cpu_to_node(cpu); @@ -362,7 +358,7 @@ cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - if (cpu < 0 || cpu >= NR_CPUS) { + if (cpu < 0 || cpu >= nr_cpu_ids) { CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); return; } @@ -382,11 +378,11 @@ cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) return; } - LASSERT(cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask)); - LASSERT(cpu_isset(cpu, *cptab->ctb_cpumask)); + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - cpu_clear(cpu, *cptab->ctb_parts[cpt].cpt_cpumask); - cpu_clear(cpu, *cptab->ctb_cpumask); + cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); + cpumask_clear_cpu(cpu, cptab->ctb_cpumask); cptab->ctb_cpu2cpt[cpu] = -1; node = cpu_to_node(cpu); @@ -394,22 +390,22 @@ cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); LASSERT(node_isset(node, *cptab->ctb_nodemask)); - for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask) { + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { /* this CPT has other CPU belonging to this node? */ if (cpu_to_node(i) == node) break; } - if (i == NR_CPUS) + if (i >= nr_cpu_ids) node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); - for_each_cpu_mask(i, *cptab->ctb_cpumask) { + for_each_cpu(i, cptab->ctb_cpumask) { /* this CPT-table has other CPU belonging to this node? */ if (cpu_to_node(i) == node) break; } - if (i == NR_CPUS) + if (i >= nr_cpu_ids) node_clear(node, *cptab->ctb_nodemask); return; @@ -421,13 +417,14 @@ cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) { int i; - if (cpus_weight(*mask) == 0 || any_online_cpu(*mask) == NR_CPUS) { + if (cpumask_weight(mask) == 0 || + cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { CDEBUG(D_INFO, "No online CPU is found in the CPU mask " "for CPU partition %d\n", cpt); return 0; } - for_each_cpu_mask(i, *mask) { + for_each_cpu(i, mask) { if (!cfs_cpt_set_cpu(cptab, cpt, i)) return 0; } @@ -441,7 +438,7 @@ cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) { int i; - for_each_cpu_mask(i, *mask) + for_each_cpu(i, mask) cfs_cpt_unset_cpu(cptab, cpt, i); } EXPORT_SYMBOL(cfs_cpt_unset_cpumask); @@ -458,14 +455,14 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) return 0; } - down(&cpt_data.cpt_mutex); + mutex_lock(&cpt_data.cpt_mutex); mask = cpt_data.cpt_cpumask; cfs_node_to_cpumask(node, mask); rc = cfs_cpt_set_cpumask(cptab, cpt, mask); - up(&cpt_data.cpt_mutex); + mutex_unlock(&cpt_data.cpt_mutex); return rc; } @@ -482,14 +479,14 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) return; } - down(&cpt_data.cpt_mutex); + mutex_lock(&cpt_data.cpt_mutex); mask = cpt_data.cpt_cpumask; cfs_node_to_cpumask(node, mask); cfs_cpt_unset_cpumask(cptab, cpt, mask); - up(&cpt_data.cpt_mutex); + mutex_unlock(&cpt_data.cpt_mutex); } EXPORT_SYMBOL(cfs_cpt_unset_node); @@ -531,7 +528,7 @@ cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) } for (; cpt <= last; cpt++) { - for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask) + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) cfs_cpt_unset_cpu(cptab, cpt, i); } } @@ -592,7 +589,7 @@ EXPORT_SYMBOL(cfs_cpt_current); int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) { - LASSERT(cpu >= 0 && cpu < NR_CPUS); + LASSERT(cpu >= 0 && cpu < nr_cpu_ids); return cptab->ctb_cpu2cpt[cpu]; } @@ -616,7 +613,7 @@ cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) nodemask = cptab->ctb_parts[cpt].cpt_nodemask; } - if (any_online_cpu(*cpumask) == NR_CPUS) { + if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) { CERROR("No online CPU found in CPU partition %d, did someone " "do CPU hotplug on system? You might need to reload " "Lustre modules to keep system working well.\n", cpt); @@ -624,7 +621,7 @@ cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) } for_each_online_cpu(i) { - if (cpu_isset(i, *cpumask)) + if (cpumask_test_cpu(i, cpumask)) continue; rc = set_cpus_allowed_ptr(current, cpumask); @@ -655,14 +652,14 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, LASSERT(number > 0); - if (number >= cpus_weight(*node)) { - while (!cpus_empty(*node)) { - cpu = first_cpu(*node); + if (number >= cpumask_weight(node)) { + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); rc = cfs_cpt_set_cpu(cptab, cpt, cpu); if (!rc) return -EINVAL; - cpu_clear(cpu, *node); + cpumask_clear_cpu(cpu, node); } return 0; } @@ -675,27 +672,27 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, goto out; } - while (!cpus_empty(*node)) { - cpu = first_cpu(*node); + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); /* get cpumask for cores in the same socket */ - cfs_cpu_core_siblings(cpu, socket); - cpus_and(*socket, *socket, *node); + cpumask_copy(socket, topology_core_cpumask(cpu)); + cpumask_and(socket, socket, node); - LASSERT(!cpus_empty(*socket)); + LASSERT(!cpumask_empty(socket)); - while (!cpus_empty(*socket)) { + while (!cpumask_empty(socket)) { int i; /* get cpumask for hts in the same core */ - cfs_cpu_ht_siblings(cpu, core); - cpus_and(*core, *core, *node); + cpumask_copy(core, topology_sibling_cpumask(cpu)); + cpumask_and(core, core, node); - LASSERT(!cpus_empty(*core)); + LASSERT(!cpumask_empty(core)); - for_each_cpu_mask(i, *core) { - cpu_clear(i, *socket); - cpu_clear(i, *node); + for_each_cpu(i, core) { + cpumask_clear_cpu(i, socket); + cpumask_clear_cpu(i, node); rc = cfs_cpt_set_cpu(cptab, cpt, i); if (!rc) { @@ -706,7 +703,7 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, if (--number == 0) goto out; } - cpu = first_cpu(*socket); + cpu = cpumask_first(socket); } } @@ -810,7 +807,7 @@ cfs_cpt_table_create(int ncpt) for_each_online_node(i) { cfs_node_to_cpumask(i, mask); - while (!cpus_empty(*mask)) { + while (!cpumask_empty(mask)) { struct cfs_cpu_partition *part; int n; @@ -823,25 +820,25 @@ cfs_cpt_table_create(int ncpt) part = &cptab->ctb_parts[cpt]; - n = num - cpus_weight(*part->cpt_cpumask); + n = num - cpumask_weight(part->cpt_cpumask); LASSERT(n > 0); rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); if (rc < 0) goto failed; - LASSERT(num >= cpus_weight(*part->cpt_cpumask)); - if (num == cpus_weight(*part->cpt_cpumask)) + LASSERT(num >= cpumask_weight(part->cpt_cpumask)); + if (num == cpumask_weight(part->cpt_cpumask)) cpt++; } } if (cpt != ncpt || - num != cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { + num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { CERROR("Expect %d(%d) CPU partitions but got %d(%d), " "CPU hotplug/unplug while setting?\n", cptab->ctb_nparts, num, cpt, - cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask)); + cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); goto failed; } @@ -867,23 +864,33 @@ static struct cfs_cpt_table * cfs_cpt_table_create_pattern(char *pattern) { struct cfs_cpt_table *cptab; - char *str = pattern; + char *str; int node = 0; + int ncpt = 0; int high; - int ncpt; + int cpt; + int rc; int c; - - for (ncpt = 0;; ncpt++) { /* quick scan bracket */ - str = strchr(str, '['); - if (str == NULL) - break; - str++; - } + int i; str = cfs_trimwhite(pattern); if (*str == 'n' || *str == 'N') { pattern = str + 1; - node = 1; + if (*pattern != '\0') { + node = 1; /* numa pattern */ + + } else { /* shortcut to create CPT from NUMA & CPU topology */ + node = -1; + ncpt = num_online_nodes(); + } + } + + if (ncpt == 0) { /* scanning bracket which is mark of partition */ + for (str = pattern;; str++, ncpt++) { + str = strchr(str, '['); + if (str == NULL) + break; + } } if (ncpt == 0 || @@ -894,21 +901,34 @@ cfs_cpt_table_create_pattern(char *pattern) return NULL; } - high = node ? MAX_NUMNODES - 1 : NR_CPUS - 1; - cptab = cfs_cpt_table_alloc(ncpt); if (cptab == NULL) { CERROR("Failed to allocate cpu partition table\n"); return NULL; } + if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ + cpt = 0; + for_each_online_node(i) { + if (cpt >= ncpt) { + CERROR("CPU changed while setting CPU " + "partition table, %d/%d\n", cpt, ncpt); + goto failed; + } + + rc = cfs_cpt_set_node(cptab, cpt++, i); + if (!rc) + goto failed; + } + return cptab; + } + + high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; + for (str = cfs_trimwhite(pattern), c = 0;; c++) { struct cfs_range_expr *range; struct cfs_expr_list *el; char *bracket = strchr(str, '['); - int cpt; - int rc; - int i; int n; if (bracket == NULL) { @@ -923,7 +943,7 @@ cfs_cpt_table_create_pattern(char *pattern) break; } - if (sscanf(str, "%u%n", &cpt, &n) < 1) { + if (sscanf(str, "%d%n", &cpt, &n) < 1) { CERROR("Invalid cpu pattern %s\n", str); goto failed; } @@ -993,7 +1013,8 @@ cfs_cpt_table_create_pattern(char *pattern) static int cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; + unsigned int cpu = (unsigned long)hcpu; + bool warn; switch (action) { case CPU_DEAD: @@ -1004,9 +1025,23 @@ cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) cpt_data.cpt_version++; spin_unlock(&cpt_data.cpt_lock); default: - CWARN("Lustre: can't support CPU hotplug well now, " - "performance and stability could be impacted" - "[CPU %u notify: %lx]\n", cpu, action); + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) { + CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n", + cpu, action); + break; + } + + mutex_lock(&cpt_data.cpt_mutex); + /* if all HTs in a core are offline, it may break affinity */ + cpumask_copy(cpt_data.cpt_cpumask, + topology_sibling_cpumask(cpu)); + warn = cpumask_any_and(cpt_data.cpt_cpumask, + cpu_online_mask) >= nr_cpu_ids; + mutex_unlock(&cpt_data.cpt_mutex); + CDEBUG(warn ? D_WARNING : D_INFO, + "Lustre: can't support CPU plug-out well now, " + "performance and stability could be impacted" + "[CPU %u action: %lx]\n", cpu, action); } return NOTIFY_OK; @@ -1046,7 +1081,7 @@ cfs_cpu_init(void) } spin_lock_init(&cpt_data.cpt_lock); - sema_init(&cpt_data.cpt_mutex, 1); + mutex_init(&cpt_data.cpt_mutex); #ifdef CONFIG_HOTPLUG_CPU register_hotcpu_notifier(&cfs_cpu_notifier);