From c3c36178914e4cfa96fd9b15051653a1ecec8845 Mon Sep 17 00:00:00 2001 From: Frederick Dilger Date: Thu, 3 Oct 2024 16:57:04 -0600 Subject: [PATCH] LU-17501 libcfs: adding X for cpu pattern Added 'X' pattern to cpu_pattern such that "X[1]" will remove cpu 1 regardless of what partition it is in. Using "N X" will set the default NUMA node layout before excluding. Using "X" without 'N' will set the layout based on npartitions. This is similarily true for 'C', use 'N' for NUMA layout and not for an npartitions based layout. Signed-off-by: Frederick Dilger Change-Id: I6276f78ecab401ed106b75ab798e5f805ef19f93 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56617 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-cpt.h | 34 ++++++-- lnet/lnet/lib-cpt.c | 191 +++++++++++++++++++++++++++++++++----------- lustre/tests/conf-sanity.sh | 184 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 342 insertions(+), 67 deletions(-) diff --git a/lnet/include/lnet/lib-cpt.h b/lnet/include/lnet/lib-cpt.h index 4b40760..26a29b77 100644 --- a/lnet/include/lnet/lib-cpt.h +++ b/lnet/include/lnet/lib-cpt.h @@ -34,13 +34,20 @@ * Examples: cpu_pattern="0[0,1] 1[2,3]" * cpu_pattern="N 0[0-3] 1[4-8]" * cpu_pattern="C[0-3]" + * cpu_pattern="X[0-1]" * * The first character "N" means following numbers are NUMA ID. * * The first character "C" means the relative cores are excluded from each - * NUMA node. This allows reserving cores on each node for non-Lustre tasks, + * partition. This allows reserving cores on each node for non-Lustre tasks, * such as HA/monitors. * + * The first character "X" means that the cores in brackets are excluded + * from the CPT that they belong to. + * + * If 'N' is specified with 'C' or 'X', the default NUMA node layout is used + * rather than the default configuration using the cpu_npartitions. + * * . NUMA allocators, CPU affinity threads are built over CPU partitions, * instead of HW CPUs or HW nodes. * @@ -161,17 +168,28 @@ int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node); */ void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node); /** - * add all cpus in NUMA node within include range \a node to - * CPU partition \a return 1 if succesfully set selected node - * cores, otherwise return 0 + * for each NUMA node, set the relative cpus \a within + * include range from that node + */ +void cfs_set_node_core(struct cfs_cpt_table *cptab, + int include_lo, int include_hi); +/** + * for each NUMA node, unset the relative cpus \a within + * exclude range from that node + */ +void cfs_unset_node_core(struct cfs_cpt_table *cptab, + int exclude_lo, int exclude_hi); +/** + * for each cpt, add the relative cpus \a within + * include range to that cpt */ -int cfs_cpt_set_node_core(struct cfs_cpt_table *cptab, int cpt, +void cfs_set_cpt_core(struct cfs_cpt_table *cptab, int include_lo, int include_hi); /** - * remove all cpus in NUMA node within exclude range \a node to - * CPU partition \a cpt + * for each cpt, remove the relative cpus \a within + * exclude range from that cpt */ -void cfs_cpt_unset_node_core(struct cfs_cpt_table *cptab, int cpt, +void cfs_unset_cpt_core(struct cfs_cpt_table *cptab, int exclude_lo, int exclude_hi); /** * add all cpus in node mask \a mask to CPU partition \a cpt diff --git a/lnet/lnet/lib-cpt.c b/lnet/lnet/lib-cpt.c index accd3f9..e9c06cc 100644 --- a/lnet/lnet/lib-cpt.c +++ b/lnet/lnet/lib-cpt.c @@ -79,10 +79,21 @@ MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket * are NUMA node ID, number before bracket is CPU partition ID. * - * i.e: "N C[0-1]" or "C[0-1], the character 'C' means numbers in bracket are - * relative core numbers to exclude for each NUMA node, all other cores - * are included. As per the example, the first two cores of each NUMA node - * will be excluded, all other cores on all nodes are included. + * i.e: "N C[0-1]" or "C[0-1]", the character 'C' means numbers in bracket are + * relative core numbers to exclude, all other cores + * are included. If 'N' is specified then the core numbers are relative to + * the NUMA nodes, otherwise, they cores are relative to each partition. + * As per the first example, the first two cores of each NUMA node + * will be excluded, all other cores on all nodes are included with + * one partition per node. In the second example, the first two cores of + * each partition will be excluded, all other cores on all partitions are + * included. The partition count is specified with cpu_npartitions. + * + * i.e: "N X[0-1]" or "X[0-1]", the character 'X' means that the numbers in + * brackets are processor IDs to be excluded from the CPT that they belong + * to. If 'N' was specified it will use the default NUMA node layout, + * otherwise it uses the default configuration for the cpu_npartitions + * specified. * * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology * This is the default behavior if the cpu_pattern and cpu_npartitions @@ -623,8 +634,8 @@ void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) } EXPORT_SYMBOL(cfs_cpt_unset_node); -int cfs_cpt_set_node_core(struct cfs_cpt_table *cptab, int cpt, - int include_lo, int include_hi) +void cfs_set_node_core(struct cfs_cpt_table *cptab, + int include_lo, int include_hi) { const cpumask_t *mask; int node, cpu; @@ -641,16 +652,16 @@ int cfs_cpt_set_node_core(struct cfs_cpt_table *cptab, int cpt, offset = cpu; if (include_lo + offset <= cpu && include_hi + offset >= cpu) - cfs_cpt_add_cpu(cptab, cpt, cpu); + cfs_cpt_add_cpu(cptab, + cfs_cpt_of_cpu(cptab, cpu), + cpu); } } - - return 1; } -EXPORT_SYMBOL(cfs_cpt_set_node_core); +EXPORT_SYMBOL(cfs_set_node_core); -void cfs_cpt_unset_node_core(struct cfs_cpt_table *cptab, int cpt, - int exclude_lo, int exclude_hi) +void cfs_unset_node_core(struct cfs_cpt_table *cptab, + int exclude_lo, int exclude_hi) { const cpumask_t *mask; int node, cpu; @@ -667,11 +678,61 @@ void cfs_cpt_unset_node_core(struct cfs_cpt_table *cptab, int cpt, offset = cpu; if (exclude_lo + offset <= cpu && exclude_hi + offset >= cpu) + cfs_cpt_del_cpu(cptab, + cfs_cpt_of_cpu(cptab, cpu), + cpu); + } + } +} +EXPORT_SYMBOL(cfs_unset_node_core); + +void cfs_set_cpt_core(struct cfs_cpt_table *cptab, + int include_lo, int include_hi) +{ + const cpumask_t *mask; + int cpt, cpu; + int offset; + + for (cpt = 0; cpt < cptab->ctb_nparts; cpt++) { + offset = -1; + mask = cptab->ctb_parts[cpt].cpt_cpumask; + if (cpumask_empty(mask)) + continue; + + for_each_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask) { + if (offset < 0) + offset = cpu; + if (include_lo + offset <= cpu && + include_hi + offset >= cpu) + cfs_cpt_add_cpu(cptab, cpt, cpu); + } + } +} +EXPORT_SYMBOL(cfs_set_cpt_core); + +void cfs_unset_cpt_core(struct cfs_cpt_table *cptab, + int exclude_lo, int exclude_hi) +{ + const cpumask_t *mask; + int cpt, cpu; + int offset; + + for (cpt = 0; cpt < cptab->ctb_nparts; cpt++) { + offset = -1; + mask = cptab->ctb_parts[cpt].cpt_cpumask; + if (cpumask_empty(mask)) + continue; + + for_each_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask) { + if (offset < 0) + offset = cpu; + if (exclude_lo + offset <= cpu && + exclude_hi + offset >= cpu) cfs_cpt_del_cpu(cptab, cpt, cpu); } } } -EXPORT_SYMBOL(cfs_cpt_unset_node_core); +EXPORT_SYMBOL(cfs_unset_cpt_core); int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, const nodemask_t *mask) @@ -996,6 +1057,7 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) char *bracket; char *str; bool exclude = false; + bool relative = false; int node = 0; int ncpt = cpu_npartitions; int cpt = 0; @@ -1027,13 +1089,22 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) str = strim(str); } + if (*str == 'x' || *str == 'X') { + str++; /* skip 'X' char */ + exclude = true; + str = strim(str); + } + if (*str == 'c' || *str == 'C') { str++; /* skip 'C' char */ exclude = true; - node = -1; /* initialize all nodes to be set */ - for_each_online_node(i) + relative = true; + } + if (node && !ncpt) { + for_each_online_node(i) { if (!cpumask_empty(cpumask_of_node(i))) ncpt++; + } } else if (!ncpt) { /* scan for bracket at start of partition */ bracket = str; while ((bracket = strchr(bracket, '['))) { @@ -1057,28 +1128,41 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) goto err_free_str; } - if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ - for_each_online_node(i) { - if (cpumask_empty(cpumask_of_node(i))) - continue; + if (exclude || node < 0) { /* create a default cpu layout */ + if (node) { + for_each_online_node(i) { + if (cpumask_empty(cpumask_of_node(i))) + continue; - rc = cfs_cpt_set_node(cptab, cpt++, i); - if (!rc) { - rc = -EINVAL; - goto err_free_table; - } + rc = cfs_cpt_set_node(cptab, cpt++, i); + if (!rc) { + rc = -EINVAL; + goto err_free_table; + } - if (exclude) { - c = 0; - for_each_cpu(rc, cpumask_of_node(i)) - c++; - if (high == 0 || c < high) - high = c; + if (exclude) { + c = 0; + for_each_cpu(rc, cpumask_of_node(i)) + c++; + if (high == 0 || c < high) + high = c; + } } - } - if (!exclude) { - kfree(pattern_dup); - return cptab; + if (node < 0) { /* return layout for only "N" */ + kfree(pattern_dup); + return cptab; + } + } else { + cfs_cpt_table_free(cptab); /* free old table */ + cptab = cfs_cpt_table_create(ncpt); + if (!cptab) { + rc = -ENOMEM; + CERROR("Failed to allocate CPU partition table based on cpu_npartitions: rc=%d\n", + -rc); + goto err_free_str; + } + for_each_cpu(rc, *cfs_cpt_cpumask(cptab, 0)) + high++; } } @@ -1150,18 +1234,15 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) } list_for_each_entry(range, &el->el_exprs, re_link) { - if (exclude && node) { - for (cpt = 0; cpt < ncpt; cpt++) { - cfs_cpt_unset_node_core(cptab, cpt, - range->re_lo, - range->re_hi); - if (!cfs_cpt_online(cptab, cpt)) { - CERROR("All cores are excluded on partition %d\n", - cpt); - rc = -ENODEV; - goto err_free_table; - } - } + if (exclude && relative) { + if (node) + cfs_unset_node_core(cptab, + range->re_lo, + range->re_hi); + else + cfs_unset_cpt_core(cptab, + range->re_lo, + range->re_hi); continue; } @@ -1169,6 +1250,13 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) if ((i - range->re_lo) % range->re_stride) continue; + if (exclude) { + cfs_cpt_unset_cpu(cptab, + cfs_cpt_of_cpu(cptab, + i), i); + continue; + } + rc = node ? cfs_cpt_set_node(cptab, cpt, i) : cfs_cpt_set_cpu(cptab, cpt, i); @@ -1183,7 +1271,16 @@ static struct cfs_cpt_table *cfs_cpt_table_create_pattern(const char *pattern) cfs_expr_list_free(el); - if (!exclude && !cfs_cpt_online(cptab, cpt)) { + if (exclude || relative) { + for (cpt = 0; cpt < ncpt; cpt++) { + if (!cfs_cpt_online(cptab, cpt)) { + rc = -ENODEV; + CERROR("All cores are excluded on partition %d: rc=%d\n", + cpt, -rc); + goto err_free_table; + } + } + } else if (!exclude && !cfs_cpt_online(cptab, cpt)) { CERROR("No online CPU is found on partition %d\n", cpt); rc = -ENODEV; goto err_free_table; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index f3289af..d993b57 100755 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -12102,50 +12102,210 @@ test_200c() { } run_test 200c "set CPU pattern using NUMA node layout" -test_200e() { +test_200d() { cleanup_200 local cpus=$(lscpu | awk '/^CPU.s.:/ {print $NF}') local nodes=$(lscpu | awk '/NUMA node.s.:/ {print $NF}') + local parts=$((cpus / 2)) local old_modopts=$MODOPTS_LIBCFS stack_trap "cleanup_200 $old_modopts" + local full_cpu_count=0 + local excluded_count=0 + + # First, get the full table + MODOPTS_LIBCFS="cpu_npartitions=$parts" + + load_modules_local libcfs + echo "full_table:" + $LCTL get_param -n cpu_partition_table + + local full_table=() + while read -r line; do + full_table+=("$line") + full_cpu_count=$((full_cpu_count + $(echo $line | wc -w) - 2)) + done < <($LCTL get_param -n cpu_partition_table) + + cleanup + + # Now, set the pattern to exclude CPU 1 + pattern="X[1]" + MODOPTS_LIBCFS="cpu_npartitions=$parts cpu_pattern=\"$pattern\"" + + load_modules_local libcfs + echo "table with CPU 1 excluded:" + grep . /sys/module/libcfs/parameters/cpu* + $LCTL get_param -n cpu_partition_table + + local table=() + while read -r line; do + table+=("$line") + excluded_count=$((excluded_count + $(echo $line | wc -w) - 2)) + done < <($LCTL get_param -n cpu_partition_table) + + # Check if CPU 1 is excluded + for line in "${table[@]}"; do + ! [[ "$line" =~ " 1 " ]] || + error "CPU 1 was not excluded with pattern: $pattern" + done + + # Check if only CPU 1 is excluded + (( excluded_count == full_cpu_count - 1 )) || + error "More than one CPU was excluded with pattern: $pattern" + + cleanup + + full_cpu_count=0 + excluded_count=0 + + # First, get the full table pattern="N" MODOPTS_LIBCFS="cpu_pattern=\"$pattern\"" load_modules_local libcfs echo "full_table:" $LCTL get_param -n cpu_partition_table - local full_table=$($LCTL get_param -n cpu_partition_table) - (( $(awk '/0.:/ {print NF - 3; exit}' <<< $full_table) > 0 )) || - skip "need at least 2 cores in each CPT to exclude one" + + full_table=() + while read -r line; do + full_table+=("$line") + full_cpu_count=$((full_cpu_count + $(echo $line | wc -w) - 2)) + done < <($LCTL get_param -n cpu_partition_table) cleanup - pattern="N C[0]" + # Now, set the pattern to exclude CPU 1 + pattern="N X[1]" MODOPTS_LIBCFS="cpu_pattern=\"$pattern\"" load_modules_local libcfs - echo "table:" + echo "table with CPU 1 excluded:" grep . /sys/module/libcfs/parameters/cpu* $LCTL get_param -n cpu_partition_table - table=$($LCTL get_param -n cpu_partition_table) + table=() + while read -r line; do + table+=("$line") + excluded_count=$((excluded_count + $(echo $line | wc -w) - 2)) + done < <($LCTL get_param -n cpu_partition_table) + + # Check if CPU 1 is excluded + cpu_1_found=false + for line in "${table[@]}"; do + [[ "$line" =~ " 1 " ]] && cpu_1_found=true + done + $cpu_1_found && error "CPU 1 was not excluded with pattern: $pattern" + + # Check if only CPU 1 is excluded + (( excluded_count == full_cpu_count - 1 )) || + error "More than one CPU was excluded with pattern: $pattern" +} +run_test 200d "set CPU pattern to exclude only CPU 1" + +test_200e() { + cleanup_200 + + local cpus=$(lscpu | awk '/^CPU.s.:/ {print $NF}') + local nodes=$(lscpu | awk '/NUMA node.s.:/ {print $NF}') + local npartitions=$((cpus / 2)) local expected local actual local excluded local partition - for (( i = 0; i < nodes; i++ )); do - expected=$(awk '/'$i'.:/ {print NF - 3; exit}' <<< $full_table) - actual=$(awk '/'$i'.:/ {print NF - 2; exit}' <<< $table) + local old_modopts=$MODOPTS_LIBCFS + stack_trap "cleanup_200 $old_modopts" + + # N C[0] + pattern="N" + MODOPTS_LIBCFS="cpu_pattern=\"$pattern\"" + + load_modules_local libcfs + echo "full_table:" + $LCTL get_param -n cpu_partition_table + + local full_table=() + while read -r line; do + full_table+=("$line") + done < <($LCTL get_param -n cpu_partition_table) + (( $($LCTL get_param -n cpu_partition_table |\ + awk '/\<0\>.*:/ {print NF - 3; exit}') > 0 )) || + skip "need at least 2 cores in each CPT to exclude one" + + cleanup + + pattern="N C[0]" + MODOPTS_LIBCFS="cpu_pattern=\"$pattern\"" + + load_modules_local libcfs + grep . /sys/module/libcfs/parameters/cpu* + echo "table with npartitions=$npartitions:" + $LCTL get_param -n cpu_partition_table + + local table=() + while read -r line; do + table+=("$line") + done < <($LCTL get_param -n cpu_partition_table) + + for (( i = 0; i < ${#table[@]}; i++ )); do + expected=$(echo ${full_table[$i]} | awk '{print NF - 3; exit}') + actual=$(echo ${table[$i]} | awk '{print NF - 2; exit}') + + (( actual == expected )) || + error "CPU count not $expected, found: $actual" + + excluded=$(echo ${full_table[$i]} | awk '{print $3; exit}') + partition=$(echo ${table[$i]} | awk '{print $3; exit}') + + ! [[ "$partition" =~ "$excluded" ]] || { + echo -e "layout wrong:\n$table" + error "excluded the wrong CPU with pattern: $pattern" + } + done + + cleanup + + # C[0] with npartitions + MODOPTS_LIBCFS="cpu_npartitions=$npartitions" + + load_modules_local libcfs + echo "full_table:" + $LCTL get_param -n cpu_partition_table + + full_table=() + while read -r line; do + full_table+=("$line") + done < <($LCTL get_param -n cpu_partition_table) + (( $($LCTL get_param -n cpu_partition_table |\ + awk '/\<0\>.*:/ {print NF - 3; exit}') > 0 )) || + skip "need at least 2 cores in each CPT to exclude one" + + cleanup + + pattern="C[0]" + MODOPTS_LIBCFS="cpu_pattern=\"$pattern\" cpu_npartitions=$npartitions" + + load_modules_local libcfs + grep . /sys/module/libcfs/parameters/cpu* + echo "table with npartitions=$npartitions:" + $LCTL get_param -n cpu_partition_table + + table=() + while read -r line; do + table+=("$line") + done < <($LCTL get_param -n cpu_partition_table) + + for (( i = 0; i < ${#table[@]}; i++ )); do + expected=$(echo ${full_table[$i]} | awk '{print NF - 3; exit}') + actual=$(echo ${table[$i]} | awk '{print NF - 2; exit}') (( actual == expected )) || error "CPU count not $expected, found: $actual" - excluded=$(awk '/'$i'.:/ {print $3; exit}' <<< $full_table) - partition=$(awk '/'$i'.:/ {print $3; exit}' <<< $table) + excluded=$(echo ${full_table[$i]} | awk '{print $3; exit}') + partition=$(echo ${table[$i]} | awk '{print $3; exit}') ! [[ "$partition" =~ "$excluded" ]] || { echo -e "layout wrong:\n$table" -- 1.8.3.1