From cdd92eaf7176eb448e1960cce376e60ea21a6dc2 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Tue, 23 Jan 2024 11:46:40 -0800 Subject: [PATCH] EX-8362 scripts: improve ll_compression_scan functionality Improve ll_compression_scan script functionality without changing the compression estimates. - add a version string to the output to allow tracking - handle pathnames with spaces in them - handle the lz4fast compression type - allow running on MacOS for testing Test-Parameters: trivial testlist=sanity-compr env=ONLY=1007 Signed-off-by: Andreas Dilger Change-Id: I0b8442a2590fdb9c718b1404cba1d73c26cff03c Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53678 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Patrick Farrell --- lustre/scripts/ll_compression_scan | 78 +++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/lustre/scripts/ll_compression_scan b/lustre/scripts/ll_compression_scan index 9e27619..af832a1 100755 --- a/lustre/scripts/ll_compression_scan +++ b/lustre/scripts/ll_compression_scan @@ -15,7 +15,7 @@ # # This tool will sample all files up to a configured number # (defaulting to 100 files) and after that, it samples a -# configurable % of remaining files. +# configurable percentage of remaining files. # # This tools samples throughout the file, so it should # avoid problems with poor estimates for files with headers @@ -26,13 +26,14 @@ # (lzop is the command line utility for lzo compression) # Default values +version="1.2" chunk_size=65536 block_size=4096 sample_count=20 min_files=100 default_path="$(pwd)" percentage=1 -compression_type="gzip" +compression_type="gzip -" compression_level=6 whole_file="false" quiet=0 @@ -40,15 +41,15 @@ quiet=0 # Display description of script behavior description() { - echo "Recursively scan PATH " + echo "Version $version. Recursively scan PATH " echo "sampling data from the first MIN_FILES " echo "then sampling data from PERCENTAGE% of remaining files " echo "to estimate the average compression ratio using " echo "COMPRESSION_TYPE level COMPRESSION_LEVEL" echo "and a chunk size of CHUNK_SIZE bytes." echo "" - echo "This tool takes SAMPLE_COUNT samples of CHUNK_SIZE bytes from" - echo "each sampled file and compresses that data with the selected" + echo "This tool takes up to SAMPLE_COUNT samples of CHUNK_SIZE bytes" + echo "from each sampled file and compresses that data with the selected" echo "parameters to generate an estimate of the compression ratio for" echo "the full dataset." echo "" @@ -59,29 +60,29 @@ description() runtime_description() { - echo "Recursively scan '$path'," + echo "Version $version. Recursively scan '${path[@]}'," if (( percentage < 100 )); then - (( min_files > 1 )) && echo "sampling data from the first $min_files files " - echo "then sampling data from $percentage% of remaining files " + (( min_files > 1 )) && echo "sampling data from the first $min_files files" + echo "then sampling data from $percentage% of remaining files" fi - echo "to estimate the average compression ratio using " - echo "$compression_type level $compression_level " + echo "to estimate the average compression ratio using" + echo "${compression_type/ -*/} level $compression_level" echo "and a chunk size of $chunk_size bytes." echo "" echo "Run with -h to see options for these parameters." echo "" - echo "This tool takes $sample_count samples of $chunk_size bytes from " - echo "each sampled file and compresses that data with the selected " - echo "parameters to generate an estimate of the compression ratio for " + echo "This tool takes up to $sample_count samples of $chunk_size bytes" + echo "from each sampled file and compresses that data with the selected" + echo "parameters to generate an estimate of the compression ratio for" echo "the full dataset." echo "" echo "You can trade-off estimation accuracy and scan speed by adjusting" echo "the per file sample count and percentage of files to sample." echo "" - echo "This tool assumes a relatively uniform distribution of file " - echo "sizes and contents across the directory tree, and is only " - echo "intended to provide an approximate estimate of the compression " - echo "potential of a specific dataset, and does not guarantee a " + echo "This tool assumes a relatively uniform distribution of file" + echo "sizes and contents across the directory tree, and is only" + echo "intended to provide an approximate estimate of the compression" + echo "potential of a specific dataset, and does not guarantee a" echo "particular compression level." echo "" } @@ -102,7 +103,7 @@ Arguments: -p PERCENTAGE: Fraction of scanned files to process. Default: ${percentage}%. -s SAMPLE_COUNT: Maximum number of chunks to sample per file. Default: $sample_count. -c CHUNK_SIZE: Size of data chunk in kibibytes (64-4096). Default: $((chunk_size / 1024))KiB. - -z COMPRESSION_TYPE: One of gzip, lz4, lz4fast, lzo. Default: $compression_type. + -z COMPRESSION_TYPE: One of gzip, lz4, lz4fast, lzo. Default: ${compression_type/ -*/}. -l COMPRESSION_LEVEL: Compression level to use (1-9). Default: $compression_level. -w Sample whole file (override -s). With '-p 100' for a full but slow estimate. -q Skip printing of usage header. -qq to also skip runtime status update. @@ -142,14 +143,14 @@ while getopts "c:s:n:p:z:Z:l:wqh" opt; do ;; z|Z) case $OPTARG in - lzo) - compression_type=lzop + lzo*) + compression_type="lzop -" ;; - lz4fast) - compression_type="lz4 --fast" + lz4fast*) + compression_type="lz4 --fast=" ;; - gzip|lz4) - compression_type=$OPTARG + gzip*|lz4*) + compression_type="${OPTARG%:*} -" ;; *) echo "Unknown compression type: $compression_type" 1>&2 @@ -157,6 +158,7 @@ while getopts "c:s:n:p:z:Z:l:wqh" opt; do exit 1 ;; esac + [[ "$OPTARG" =~ ":" ]] && compression_level=${OPTARG#*:} ;; l) compression_level=$OPTARG @@ -179,19 +181,17 @@ if (( compression_level < 1 || compression_level > 12 )); then echo "Compression level must be between 1 and 12" 1>&2 exit 1 fi -if [[ $compression_level -gt 9 && $compression_type != "lz4" ]]; then - echo "Compression level must be between 1 and 9 (levels 10-12 are lz4 only)" 1>&2 +if [[ $compression_level -gt 9 && ! $compression_type =~ "lz4" ]]; then + echo "Compression level must be between 1 and 9 (10+ for lz4 only)" 1>&2 exit 2 fi -path_provided=false -compress="$compression_type -q -$compression_level" +compress="$compression_type$compression_level -q" shift $((OPTIND - 1)) if [[ -z "$@" ]]; then - path=$default_path + path=($default_path) else - path="$@" - path_provided=true + path=("$@") shift fi @@ -210,10 +210,12 @@ round_to_block_size() { echo $(( ((size - 1) | (block_size - 1)) + 1 )) } +export format="--format=%s" +[[ $(uname) != "Darwin" ]] || format="-f %z" # Function to process a file process_file() { local file="$1" - local file_size=$(stat --format=%s "$file") + local file_size=$(stat $format "$file") local sum_uncompressed_chunk=0 local sum_compressed_chunk=0 @@ -351,10 +353,10 @@ total_file_count=0 last=$SECONDS echo "" -if [ "$path_provided" = true ]; then - echo "Scanning $path." +if [[ "${path[@]}" != "$default_path" ]]; then + echo "Scanning '${path[@]}'." else - echo "Scanning current directory, $path." + echo "Scanning current directory, '${path[@]}'." fi echo "" echo "" @@ -382,13 +384,13 @@ while read FILE; do fi last=$SECONDS fi -done < <(find $path -type f -print) +done < <(find "${path[@]}" -type f -print) (( total_file_count == 0 )) && - echo "error: no files found in '$path' to compress" 1>&2 && + echo "error: no files found in '${path[@]}' to compress" 1>&2 && exit 10 (( total_uncompressed_size == 0 )) && - echo "error: only zero-length files found in '$path'" 1>&2 && + echo "error: only zero-length files found in '${path[@]}'" 1>&2 && exit 11 echo "" -- 1.8.3.1