From 7e6f144739e0d4a3fdc960eb2c1d78c897d246f9 Mon Sep 17 00:00:00 2001 From: Raphael Druon Date: Thu, 19 Oct 2023 09:05:25 -0600 Subject: [PATCH] EX-8362 scripts: Improve estimated ratio ll_compression_scan does not take in account the size of the sampled files, this might lead to uncorrect estimated ratio for non homogeneous file. This patch apply the compression ratio estimated with the sampled data and applies it to the entire file size, assuming the file will have the same compression ratio across it. Test-Parameters: trivial Signed-off-by: Raphael Druon Change-Id: Ic4a26460e17c666b9edf4c0d8d450a06fad5920f Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52759 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/scripts/ll_compression_scan | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lustre/scripts/ll_compression_scan b/lustre/scripts/ll_compression_scan index ffd0bf9..9e27619 100755 --- a/lustre/scripts/ll_compression_scan +++ b/lustre/scripts/ll_compression_scan @@ -201,6 +201,8 @@ export total_uncompressed_size=0 export total_compressed_size=0 export total_files_scanned=0 export total_empty_files=0 +export total_uncompressed_size_estimated=0 +export total_compressed_size_estimated=0 round_to_block_size() { local size=$1 @@ -242,7 +244,6 @@ process_file() { # Round up the file_size to the next block (actual space usage) file_size=$(round_to_block_size file_size) - # Accumulate the total size of files scanned (in block_size units) total_file_size=$((total_file_size + file_size)) @@ -268,14 +269,26 @@ process_file() { done + # Get current ratio for this file + current_ratio=$((sum_uncompressed_chunk * 100 / sum_compressed_chunk)) + # Assume ratio will be the same for the entire file + estimated_compressed_file_size=$(( file_size * 100 / current_ratio)) + # Accumulate the total uncompressed and compressed byte counts total_uncompressed_size=$((total_uncompressed_size + sum_uncompressed_chunk)) total_compressed_size=$((total_compressed_size + sum_compressed_chunk)) + + # Accumulate the estimated uncompressed and compressed byte counts + total_uncompressed_size_estimated=$((total_uncompressed_size_estimated + + file_size)) + total_compressed_size_estimated=$((total_compressed_size_estimated + + estimated_compressed_file_size)) } -# Calculate compression ratio from compressed chunks (value > 1) -calculate_ratio() { - local ratio=$((total_uncompressed_size * 100 / total_compressed_size)) +# Calculate compression ratio from estimated compressed file (value > 1) +calculate_estimated_ratio() { + local ratio=$((total_uncompressed_size_estimated * 100 / + total_compressed_size_estimated)) printf "%u.%02u" $((ratio / 100)) $((ratio % 100)) } @@ -293,7 +306,7 @@ calculate_estimated_total_compressed_size() { local ratio=$1 - printf "%d" $(echo "scale=0; $size_of_all_files / $compression_ratio" | bc) + printf "%d" $(echo "scale=0; $size_of_all_files / $compression_ratio_estimated" | bc) } print_size() { @@ -363,9 +376,9 @@ while read FILE; do total_files_scanned % lines == 0 || last + interval < SECONDS) )); then if ((total_files_scanned != total_file_count)); then - echo -ne "${cr}Sampled $total_files_scanned/$total_file_count files so far, estimated compression ratio $(calculate_ratio)x...${lf}" + echo -ne "${cr}Sampled $total_files_scanned/$total_file_count files so far, estimated compression ratio $(calculate_estimated_ratio)x...${lf}" else - echo -ne "${cr}Sampled $total_files_scanned files so far, estimated compression ratio $(calculate_ratio)x...${lf}" + echo -ne "${cr}Sampled $total_files_scanned files so far, estimated compression ratio $(calculate_estimated_ratio)x...${lf}" fi last=$SECONDS fi @@ -395,8 +408,8 @@ echo "Total size of files sampled: $(print_size $total_file_size)" echo "Total uncompressed size of sampled data: $(print_size $total_uncompressed_size)" echo "Total compressed size of sampled data: $(print_size $total_compressed_size)" echo "Compressed size as percentage of uncompressed size: $(calculate_pct)" -compression_ratio=$(calculate_ratio) -echo "Compression ratio of sampled data: ${compression_ratio}x" +compression_ratio_estimated=$(calculate_estimated_ratio) +echo "Estimated compression ratio of sampled files: ${compression_ratio_estimated}x" if (( total_files_scanned < total_file_count )); then size_of_all_files=$((total_file_size * total_file_count / total_files_scanned)) echo "Estimated size of all $total_file_count files: $(print_size $size_of_all_files)" -- 1.8.3.1