From 111451ea3ae28f18335b0bf1539488a913886026 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Mon, 18 Sep 2023 17:12:46 -0400 Subject: [PATCH] EX-7795 scripts: add whole file to compression scan Add a mode where the compression scan script compresses the entire file, which in theory should 100% match the compression results from using CSDC and allow a test to calculate the exact space usage reduction expected by using CSDC. This is intended to be used mostly for testing. Change help documentation slightly to make clear this can also accept a path to a single file. Test-Parameters: trivial Signed-off-by: Patrick Farrell Change-Id: I606a33d686d87dd631bf5b33dc85ee8c24fe9f67 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52406 Tested-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/scripts/ll_compression_scan | 60 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/lustre/scripts/ll_compression_scan b/lustre/scripts/ll_compression_scan index 5271f39..ffd0bf9 100755 --- a/lustre/scripts/ll_compression_scan +++ b/lustre/scripts/ll_compression_scan @@ -30,16 +30,17 @@ chunk_size=65536 block_size=4096 sample_count=20 min_files=100 -default_directory="$(pwd)" +default_path="$(pwd)" percentage=1 compression_type="gzip" compression_level=6 +whole_file="false" quiet=0 # Display description of script behavior description() { - echo "Recursively scan DIRECTORY " + echo "Recursively scan PATH " echo "sampling data from the first MIN_FILES " echo "then sampling data from PERCENTAGE% of remaining files " echo "to estimate the average compression ratio using " @@ -58,7 +59,7 @@ description() runtime_description() { - echo "Recursively scan '$directory'," + echo "Recursively scan '$path'," if (( percentage < 100 )); then (( min_files > 1 )) && echo "sampling data from the first $min_files files " echo "then sampling data from $percentage% of remaining files " @@ -89,8 +90,9 @@ runtime_description() usage() { cat <<- USAGE Usage: $(basename $0) [-n MIN_FILES] [-p PERCENTAGE] [-s SAMPLE_COUNT] - [-c CHUNK_SIZE] [-z COMPRESSION_TYPE] [-l COMPRESSION_LEVEL] [-h][-q] - [DIRECTORY ...] + [-c CHUNK_SIZE] [-z COMPRESSION_TYPE] [-l COMPRESSION_LEVEL] + [-h][-w][-q] + [PATH ...] Description: $(description | fmt) @@ -102,13 +104,14 @@ Arguments: -c CHUNK_SIZE: Size of data chunk in kibibytes (64-4096). Default: $((chunk_size / 1024))KiB. -z COMPRESSION_TYPE: One of gzip, lz4, lz4fast, lzo. Default: $compression_type. -l COMPRESSION_LEVEL: Compression level to use (1-9). Default: $compression_level. + -w Sample whole file (override -s). With '-p 100' for a full but slow estimate. -q Skip printing of usage header. -qq to also skip runtime status update. -h Print this help message. USAGE } # Parse command-line options -while getopts "c:s:n:p:z:Z:l:qh" opt; do +while getopts "c:s:n:p:z:Z:l:wqh" opt; do case $opt in c) if (( OPTARG & (OPTARG - 1) )); then @@ -158,6 +161,9 @@ while getopts "c:s:n:p:z:Z:l:qh" opt; do l) compression_level=$OPTARG ;; + w) + whole_file="true" + ;; h) usage exit 0 @@ -178,14 +184,14 @@ if [[ $compression_level -gt 9 && $compression_type != "lz4" ]]; then exit 2 fi -directory_provided=false +path_provided=false compress="$compression_type -q -$compression_level" shift $((OPTIND - 1)) if [[ -z "$@" ]]; then - directory=$default_directory + path=$default_path else - directory="$@" - directory_provided=true + path="$@" + path_provided=true shift fi @@ -216,17 +222,23 @@ process_file() { return fi - - # Calculate the segment size for the file - local segment_size=$((file_size / sample_count)) - - # Limit sample_count for small file size, but have at least one chunk - if ((sample_count * chunk_size > file_size)); then + local segment_size + if [[ $whole_file == "true" ]]; then + segment_size=$chunk_size sample_count=$((file_size / chunk_size)) - if ((sample_count == 0)); then - sample_count=1 + else + # Calculate the segment size for the file + segment_size=$((file_size / sample_count)) + + # Limit sample_count for small file size, but have at least + # one chunk + if ((sample_count * chunk_size > file_size)); then + sample_count=$((file_size / chunk_size)) fi fi + if ((sample_count == 0)); then + sample_count=1 + fi # Round up the file_size to the next block (actual space usage) file_size=$(round_to_block_size file_size) @@ -326,10 +338,10 @@ total_file_count=0 last=$SECONDS echo "" -if [ "$directory_provided" = true ]; then - echo "Scanning $directory." +if [ "$path_provided" = true ]; then + echo "Scanning $path." else - echo "Scanning current directory, $directory." + echo "Scanning current directory, $path." fi echo "" echo "" @@ -357,13 +369,13 @@ while read FILE; do fi last=$SECONDS fi -done < <(find $directory -type f -print) +done < <(find $path -type f -print) (( total_file_count == 0 )) && - echo "error: no files found in '$directory' to compress" 1>&2 && + echo "error: no files found in '$path' to compress" 1>&2 && exit 10 (( total_uncompressed_size == 0 )) && - echo "error: only zero-length files found in '$directory'" 1>&2 && + echo "error: only zero-length files found in '$path'" 1>&2 && exit 11 echo "" -- 1.8.3.1