* Examples: cpu_pattern="0[0,1] 1[2,3]"
* cpu_pattern="N 0[0-3] 1[4-8]"
* cpu_pattern="C[0-3]"
+ * cpu_pattern="X[0-1]"
*
* The first character "N" means following numbers are NUMA ID.
*
* The first character "C" means the relative cores are excluded from each
- * NUMA node. This allows reserving cores on each node for non-Lustre tasks,
+ * partition. This allows reserving cores on each node for non-Lustre tasks,
* such as HA/monitors.
*
+ * The first character "X" means that the cores in brackets are excluded
+ * from the CPT that they belong to.
+ *
+ * If 'N' is specified with 'C' or 'X', the default NUMA node layout is used
+ * rather than the default configuration using the cpu_npartitions.
+ *
* . NUMA allocators, CPU affinity threads are built over CPU partitions,
* instead of HW CPUs or HW nodes.
*
*/
void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
/**
- * add all cpus in NUMA node within include range \a node to
- * CPU partition \a return 1 if succesfully set selected node
- * cores, otherwise return 0
+ * for each NUMA node, set the relative cpus \a within
+ * include range from that node
+ */
+void cfs_set_node_core(struct cfs_cpt_table *cptab,
+ int include_lo, int include_hi);
+/**
+ * for each NUMA node, unset the relative cpus \a within
+ * exclude range from that node
+ */
+void cfs_unset_node_core(struct cfs_cpt_table *cptab,
+ int exclude_lo, int exclude_hi);
+/**
+ * for each cpt, add the relative cpus \a within
+ * include range to that cpt
*/
-int cfs_cpt_set_node_core(struct cfs_cpt_table *cptab, int cpt,
+void cfs_set_cpt_core(struct cfs_cpt_table *cptab,
int include_lo, int include_hi);
/**
- * remove all cpus in NUMA node within exclude range \a node to
- * CPU partition \a cpt
+ * for each cpt, remove the relative cpus \a within
+ * exclude range from that cpt
*/
-void cfs_cpt_unset_node_core(struct cfs_cpt_table *cptab, int cpt,
+void cfs_unset_cpt_core(struct cfs_cpt_table *cptab,
int exclude_lo, int exclude_hi);
/**
* add all cpus in node mask \a mask to CPU partition \a cpt
* i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
* are NUMA node ID, number before bracket is CPU partition ID.
*
- * i.e: "N C[0-1]" or "C[0-1], the character 'C' means numbers in bracket are
- * relative core numbers to exclude for each NUMA node, all other cores
- * are included. As per the example, the first two cores of each NUMA node
- * will be excluded, all other cores on all nodes are included.
+ * i.e: "N C[0-1]" or "C[0-1]", the character 'C' means numbers in bracket are
+ * relative core numbers to exclude, all other cores
+ * are included. If 'N' is specified then the core numbers are relative to
+ * the NUMA nodes, otherwise, they cores are relative to each partition.
+ * As per the first example, the first two cores of each NUMA node
+ * will be excluded, all other cores on all nodes are included with
+ * one partition per node. In the second example, the first two cores of
+ * each partition will be excluded, all other cores on all partitions are
+ * included. The partition count is specified with cpu_npartitions.
+ *
+ * i.e: "N X[0-1]" or "X[0-1]", the character 'X' means that the numbers in
+ * brackets are processor IDs to be excluded from the CPT that they belong
+ * to. If 'N' was specified it will use the default NUMA node layout,
+ * otherwise it uses the default configuration for the cpu_npartitions
+ * specified.
*
* i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
* This is the default behavior if the cpu_pattern and cpu_npartitions
}
EXPORT_SYMBOL(cfs_cpt_unset_node);
-int cfs_cpt_set_node_core(struct cfs_cpt_table *cptab, int cpt,
- int include_lo, int include_hi)
+void cfs_set_node_core(struct cfs_cpt_table *cptab,
+ int include_lo, int include_hi)
{
const cpumask_t *mask;
int node, cpu;
offset = cpu;
if (include_lo + offset <= cpu &&
include_hi + offset >= cpu)
- cfs_cpt_add_cpu(cptab, cpt, cpu);
+ cfs_cpt_add_cpu(cptab,
+ cfs_cpt_of_cpu(cptab, cpu),
+ cpu);
}
}
-
- return 1;
}
-EXPORT_SYMBOL(cfs_cpt_set_node_core);
+EXPORT_SYMBOL(cfs_set_node_core);
-void cfs_cpt_unset_node_core(struct cfs_cpt_table *cptab, int cpt,
- int exclude_lo, int exclude_hi)
+void cfs_unset_node_core(struct cfs_cpt_table *cptab,
+ int exclude_lo, int exclude_hi)
{
const cpumask_t *mask;
int node, cpu;
offset = cpu;
if (exclude_lo + offset <= cpu &&
exclude_hi + offset >= cpu)
+ cfs_cpt_del_cpu(cptab,
+ cfs_cpt_of_cpu(cptab, cpu),
+ cpu);
+ }
+ }
+}
+EXPORT_SYMBOL(cfs_unset_node_core);
+
+void cfs_set_cpt_core(struct cfs_cpt_table *cptab,
+ int include_lo, int include_hi)
+{
+ const cpumask_t *mask;
+ int cpt, cpu;
+ int offset;
+
+ for (cpt = 0; cpt < cptab->ctb_nparts; cpt++) {
+ offset = -1;
+ mask = cptab->ctb_parts[cpt].cpt_cpumask;
+ if (cpumask_empty(mask))
+ continue;
+
+ for_each_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask) {
+ if (offset < 0)
+ offset = cpu;
+ if (include_lo + offset <= cpu &&
+ include_hi + offset >= cpu)
+ cfs_cpt_add_cpu(cptab, cpt, cpu);
+ }
+ }
+}
+EXPORT_SYMBOL(cfs_set_cpt_core);
+
+void cfs_unset_cpt_core(struct cfs_cpt_table *cptab,
+ int exclude_lo, int exclude_hi)
+{
+ const cpumask_t *mask;
+ int cpt, cpu;
+ int offset;
+
+ for (cpt = 0; cpt < cptab->ctb_nparts; cpt++) {
+ offset = -1;
+ mask = cptab->ctb_parts[cpt].cpt_cpumask;
+ if (cpumask_empty(mask))
+ continue;
+
+ for_each_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask) {
+ if (offset < 0)
+ offset = cpu;
+ if (exclude_lo + offset <= cpu &&
+ exclude_hi + offset >= cpu)
cfs_cpt_del_cpu(cptab, cpt, cpu);
}
}
}
-EXPORT_SYMBOL(cfs_cpt_unset_node_core);
+EXPORT_SYMBOL(cfs_unset_cpt_core);
int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt,
const nodemask_t *mask)
char *bracket;
char *str;
bool exclude = false;
+ bool relative = false;
int node = 0;
int ncpt = cpu_npartitions;
int cpt = 0;
str = strim(str);
}
+ if (*str == 'x' || *str == 'X') {
+ str++; /* skip 'X' char */
+ exclude = true;
+ str = strim(str);
+ }
+
if (*str == 'c' || *str == 'C') {
str++; /* skip 'C' char */
exclude = true;
- node = -1; /* initialize all nodes to be set */
- for_each_online_node(i)
+ relative = true;
+ }
+ if (node && !ncpt) {
+ for_each_online_node(i) {
if (!cpumask_empty(cpumask_of_node(i)))
ncpt++;
+ }
} else if (!ncpt) { /* scan for bracket at start of partition */
bracket = str;
while ((bracket = strchr(bracket, '['))) {
goto err_free_str;
}
- if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
- for_each_online_node(i) {
- if (cpumask_empty(cpumask_of_node(i)))
- continue;
+ if (exclude || node < 0) { /* create a default cpu layout */
+ if (node) {
+ for_each_online_node(i) {
+ if (cpumask_empty(cpumask_of_node(i)))
+ continue;
- rc = cfs_cpt_set_node(cptab, cpt++, i);
- if (!rc) {
- rc = -EINVAL;
- goto err_free_table;
- }
+ rc = cfs_cpt_set_node(cptab, cpt++, i);
+ if (!rc) {
+ rc = -EINVAL;
+ goto err_free_table;
+ }
- if (exclude) {
- c = 0;
- for_each_cpu(rc, cpumask_of_node(i))
- c++;
- if (high == 0 || c < high)
- high = c;
+ if (exclude) {
+ c = 0;
+ for_each_cpu(rc, cpumask_of_node(i))
+ c++;
+ if (high == 0 || c < high)
+ high = c;
+ }
}
- }
- if (!exclude) {
- kfree(pattern_dup);
- return cptab;
+ if (node < 0) { /* return layout for only "N" */
+ kfree(pattern_dup);
+ return cptab;
+ }
+ } else {
+ cfs_cpt_table_free(cptab); /* free old table */
+ cptab = cfs_cpt_table_create(ncpt);
+ if (!cptab) {
+ rc = -ENOMEM;
+ CERROR("Failed to allocate CPU partition table based on cpu_npartitions: rc=%d\n",
+ -rc);
+ goto err_free_str;
+ }
+ for_each_cpu(rc, *cfs_cpt_cpumask(cptab, 0))
+ high++;
}
}
}
list_for_each_entry(range, &el->el_exprs, re_link) {
- if (exclude && node) {
- for (cpt = 0; cpt < ncpt; cpt++) {
- cfs_cpt_unset_node_core(cptab, cpt,
- range->re_lo,
- range->re_hi);
- if (!cfs_cpt_online(cptab, cpt)) {
- CERROR("All cores are excluded on partition %d\n",
- cpt);
- rc = -ENODEV;
- goto err_free_table;
- }
- }
+ if (exclude && relative) {
+ if (node)
+ cfs_unset_node_core(cptab,
+ range->re_lo,
+ range->re_hi);
+ else
+ cfs_unset_cpt_core(cptab,
+ range->re_lo,
+ range->re_hi);
continue;
}
if ((i - range->re_lo) % range->re_stride)
continue;
+ if (exclude) {
+ cfs_cpt_unset_cpu(cptab,
+ cfs_cpt_of_cpu(cptab,
+ i), i);
+ continue;
+ }
+
rc = node ?
cfs_cpt_set_node(cptab, cpt, i)
: cfs_cpt_set_cpu(cptab, cpt, i);
cfs_expr_list_free(el);
- if (!exclude && !cfs_cpt_online(cptab, cpt)) {
+ if (exclude || relative) {
+ for (cpt = 0; cpt < ncpt; cpt++) {
+ if (!cfs_cpt_online(cptab, cpt)) {
+ rc = -ENODEV;
+ CERROR("All cores are excluded on partition %d: rc=%d\n",
+ cpt, -rc);
+ goto err_free_table;
+ }
+ }
+ } else if (!exclude && !cfs_cpt_online(cptab, cpt)) {
CERROR("No online CPU is found on partition %d\n", cpt);
rc = -ENODEV;
goto err_free_table;
}
run_test 200c "set CPU pattern using NUMA node layout"
-test_200e() {
+test_200d() {
cleanup_200
local cpus=$(lscpu | awk '/^CPU.s.:/ {print $NF}')
local nodes=$(lscpu | awk '/NUMA node.s.:/ {print $NF}')
+ local parts=$((cpus / 2))
local old_modopts=$MODOPTS_LIBCFS
stack_trap "cleanup_200 $old_modopts"
+ local full_cpu_count=0
+ local excluded_count=0
+
+ # First, get the full table
+ MODOPTS_LIBCFS="cpu_npartitions=$parts"
+
+ load_modules_local libcfs
+ echo "full_table:"
+ $LCTL get_param -n cpu_partition_table
+
+ local full_table=()
+ while read -r line; do
+ full_table+=("$line")
+ full_cpu_count=$((full_cpu_count + $(echo $line | wc -w) - 2))
+ done < <($LCTL get_param -n cpu_partition_table)
+
+ cleanup
+
+ # Now, set the pattern to exclude CPU 1
+ pattern="X[1]"
+ MODOPTS_LIBCFS="cpu_npartitions=$parts cpu_pattern=\"$pattern\""
+
+ load_modules_local libcfs
+ echo "table with CPU 1 excluded:"
+ grep . /sys/module/libcfs/parameters/cpu*
+ $LCTL get_param -n cpu_partition_table
+
+ local table=()
+ while read -r line; do
+ table+=("$line")
+ excluded_count=$((excluded_count + $(echo $line | wc -w) - 2))
+ done < <($LCTL get_param -n cpu_partition_table)
+
+ # Check if CPU 1 is excluded
+ for line in "${table[@]}"; do
+ ! [[ "$line" =~ " 1 " ]] ||
+ error "CPU 1 was not excluded with pattern: $pattern"
+ done
+
+ # Check if only CPU 1 is excluded
+ (( excluded_count == full_cpu_count - 1 )) ||
+ error "More than one CPU was excluded with pattern: $pattern"
+
+ cleanup
+
+ full_cpu_count=0
+ excluded_count=0
+
+ # First, get the full table
pattern="N"
MODOPTS_LIBCFS="cpu_pattern=\"$pattern\""
load_modules_local libcfs
echo "full_table:"
$LCTL get_param -n cpu_partition_table
- local full_table=$($LCTL get_param -n cpu_partition_table)
- (( $(awk '/0.:/ {print NF - 3; exit}' <<< $full_table) > 0 )) ||
- skip "need at least 2 cores in each CPT to exclude one"
+
+ full_table=()
+ while read -r line; do
+ full_table+=("$line")
+ full_cpu_count=$((full_cpu_count + $(echo $line | wc -w) - 2))
+ done < <($LCTL get_param -n cpu_partition_table)
cleanup
- pattern="N C[0]"
+ # Now, set the pattern to exclude CPU 1
+ pattern="N X[1]"
MODOPTS_LIBCFS="cpu_pattern=\"$pattern\""
load_modules_local libcfs
- echo "table:"
+ echo "table with CPU 1 excluded:"
grep . /sys/module/libcfs/parameters/cpu*
$LCTL get_param -n cpu_partition_table
- table=$($LCTL get_param -n cpu_partition_table)
+ table=()
+ while read -r line; do
+ table+=("$line")
+ excluded_count=$((excluded_count + $(echo $line | wc -w) - 2))
+ done < <($LCTL get_param -n cpu_partition_table)
+
+ # Check if CPU 1 is excluded
+ cpu_1_found=false
+ for line in "${table[@]}"; do
+ [[ "$line" =~ " 1 " ]] && cpu_1_found=true
+ done
+ $cpu_1_found && error "CPU 1 was not excluded with pattern: $pattern"
+
+ # Check if only CPU 1 is excluded
+ (( excluded_count == full_cpu_count - 1 )) ||
+ error "More than one CPU was excluded with pattern: $pattern"
+}
+run_test 200d "set CPU pattern to exclude only CPU 1"
+
+test_200e() {
+ cleanup_200
+
+ local cpus=$(lscpu | awk '/^CPU.s.:/ {print $NF}')
+ local nodes=$(lscpu | awk '/NUMA node.s.:/ {print $NF}')
+ local npartitions=$((cpus / 2))
local expected
local actual
local excluded
local partition
- for (( i = 0; i < nodes; i++ )); do
- expected=$(awk '/'$i'.:/ {print NF - 3; exit}' <<< $full_table)
- actual=$(awk '/'$i'.:/ {print NF - 2; exit}' <<< $table)
+ local old_modopts=$MODOPTS_LIBCFS
+ stack_trap "cleanup_200 $old_modopts"
+
+ # N C[0]
+ pattern="N"
+ MODOPTS_LIBCFS="cpu_pattern=\"$pattern\""
+
+ load_modules_local libcfs
+ echo "full_table:"
+ $LCTL get_param -n cpu_partition_table
+
+ local full_table=()
+ while read -r line; do
+ full_table+=("$line")
+ done < <($LCTL get_param -n cpu_partition_table)
+ (( $($LCTL get_param -n cpu_partition_table |\
+ awk '/\<0\>.*:/ {print NF - 3; exit}') > 0 )) ||
+ skip "need at least 2 cores in each CPT to exclude one"
+
+ cleanup
+
+ pattern="N C[0]"
+ MODOPTS_LIBCFS="cpu_pattern=\"$pattern\""
+
+ load_modules_local libcfs
+ grep . /sys/module/libcfs/parameters/cpu*
+ echo "table with npartitions=$npartitions:"
+ $LCTL get_param -n cpu_partition_table
+
+ local table=()
+ while read -r line; do
+ table+=("$line")
+ done < <($LCTL get_param -n cpu_partition_table)
+
+ for (( i = 0; i < ${#table[@]}; i++ )); do
+ expected=$(echo ${full_table[$i]} | awk '{print NF - 3; exit}')
+ actual=$(echo ${table[$i]} | awk '{print NF - 2; exit}')
+
+ (( actual == expected )) ||
+ error "CPU count not $expected, found: $actual"
+
+ excluded=$(echo ${full_table[$i]} | awk '{print $3; exit}')
+ partition=$(echo ${table[$i]} | awk '{print $3; exit}')
+
+ ! [[ "$partition" =~ "$excluded" ]] || {
+ echo -e "layout wrong:\n$table"
+ error "excluded the wrong CPU with pattern: $pattern"
+ }
+ done
+
+ cleanup
+
+ # C[0] with npartitions
+ MODOPTS_LIBCFS="cpu_npartitions=$npartitions"
+
+ load_modules_local libcfs
+ echo "full_table:"
+ $LCTL get_param -n cpu_partition_table
+
+ full_table=()
+ while read -r line; do
+ full_table+=("$line")
+ done < <($LCTL get_param -n cpu_partition_table)
+ (( $($LCTL get_param -n cpu_partition_table |\
+ awk '/\<0\>.*:/ {print NF - 3; exit}') > 0 )) ||
+ skip "need at least 2 cores in each CPT to exclude one"
+
+ cleanup
+
+ pattern="C[0]"
+ MODOPTS_LIBCFS="cpu_pattern=\"$pattern\" cpu_npartitions=$npartitions"
+
+ load_modules_local libcfs
+ grep . /sys/module/libcfs/parameters/cpu*
+ echo "table with npartitions=$npartitions:"
+ $LCTL get_param -n cpu_partition_table
+
+ table=()
+ while read -r line; do
+ table+=("$line")
+ done < <($LCTL get_param -n cpu_partition_table)
+
+ for (( i = 0; i < ${#table[@]}; i++ )); do
+ expected=$(echo ${full_table[$i]} | awk '{print NF - 3; exit}')
+ actual=$(echo ${table[$i]} | awk '{print NF - 2; exit}')
(( actual == expected )) ||
error "CPU count not $expected, found: $actual"
- excluded=$(awk '/'$i'.:/ {print $3; exit}' <<< $full_table)
- partition=$(awk '/'$i'.:/ {print $3; exit}' <<< $table)
+ excluded=$(echo ${full_table[$i]} | awk '{print $3; exit}')
+ partition=$(echo ${table[$i]} | awk '{print $3; exit}')
! [[ "$partition" =~ "$excluded" ]] || {
echo -e "layout wrong:\n$table"