Whamcloud - gitweb
EX-8362 scripts: Improve estimated ratio
authorRaphael Druon <rdruon@ddn.com>
Thu, 19 Oct 2023 15:05:25 +0000 (09:05 -0600)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 27 Oct 2023 21:43:42 +0000 (21:43 +0000)
ll_compression_scan does not take in account the size of the
sampled files, this might lead to uncorrect estimated ratio for non
homogeneous file.

This patch apply the compression ratio estimated with the sampled data
and applies it to the entire file size, assuming the file will have
the same compression ratio across it.

Test-Parameters: trivial
Signed-off-by: Raphael Druon <rdruon@ddn.com>
Change-Id: Ic4a26460e17c666b9edf4c0d8d450a06fad5920f
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52759
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/scripts/ll_compression_scan

index ffd0bf9..9e27619 100755 (executable)
@@ -201,6 +201,8 @@ export total_uncompressed_size=0
 export total_compressed_size=0
 export total_files_scanned=0
 export total_empty_files=0
+export total_uncompressed_size_estimated=0
+export total_compressed_size_estimated=0
 
 round_to_block_size() {
        local size=$1
@@ -242,7 +244,6 @@ process_file() {
 
        # Round up the file_size to the next block (actual space usage)
        file_size=$(round_to_block_size file_size)
-
        # Accumulate the total size of files scanned (in block_size units)
        total_file_size=$((total_file_size + file_size))
 
@@ -268,14 +269,26 @@ process_file() {
 
        done
 
+       # Get current ratio for this file
+       current_ratio=$((sum_uncompressed_chunk * 100 / sum_compressed_chunk))
+       # Assume ratio will be the same for the entire file
+       estimated_compressed_file_size=$(( file_size * 100 / current_ratio))
+
        # Accumulate the total uncompressed and compressed byte counts
        total_uncompressed_size=$((total_uncompressed_size + sum_uncompressed_chunk))
        total_compressed_size=$((total_compressed_size + sum_compressed_chunk))
+
+       # Accumulate the estimated uncompressed and compressed byte counts
+       total_uncompressed_size_estimated=$((total_uncompressed_size_estimated +
+                                             file_size))
+       total_compressed_size_estimated=$((total_compressed_size_estimated +
+                                           estimated_compressed_file_size))
 }
 
-# Calculate compression ratio from compressed chunks (value > 1)
-calculate_ratio() {
-       local ratio=$((total_uncompressed_size * 100 / total_compressed_size))
+# Calculate compression ratio from estimated compressed file (value > 1)
+calculate_estimated_ratio() {
+       local ratio=$((total_uncompressed_size_estimated * 100 /
+                       total_compressed_size_estimated))
 
        printf "%u.%02u" $((ratio / 100)) $((ratio % 100))
 }
@@ -293,7 +306,7 @@ calculate_estimated_total_compressed_size()
 {
        local ratio=$1
 
-       printf "%d" $(echo "scale=0; $size_of_all_files / $compression_ratio" | bc)
+       printf "%d" $(echo "scale=0; $size_of_all_files / $compression_ratio_estimated" | bc)
 }
 
 print_size() {
@@ -363,9 +376,9 @@ while read FILE; do
               total_files_scanned % lines == 0 ||
               last + interval < SECONDS) )); then
                if ((total_files_scanned != total_file_count)); then
-                       echo -ne "${cr}Sampled $total_files_scanned/$total_file_count files so far, estimated compression ratio $(calculate_ratio)x...${lf}"
+                       echo -ne "${cr}Sampled $total_files_scanned/$total_file_count files so far, estimated compression ratio $(calculate_estimated_ratio)x...${lf}"
                else
-                       echo -ne "${cr}Sampled $total_files_scanned files so far, estimated compression ratio $(calculate_ratio)x...${lf}"
+                       echo -ne "${cr}Sampled $total_files_scanned files so far, estimated compression ratio $(calculate_estimated_ratio)x...${lf}"
                fi
                last=$SECONDS
        fi
@@ -395,8 +408,8 @@ echo "Total size of files sampled: $(print_size $total_file_size)"
 echo "Total uncompressed size of sampled data: $(print_size $total_uncompressed_size)"
 echo "Total compressed size of sampled data: $(print_size $total_compressed_size)"
 echo "Compressed size as percentage of uncompressed size: $(calculate_pct)"
-compression_ratio=$(calculate_ratio)
-echo "Compression ratio of sampled data: ${compression_ratio}x"
+compression_ratio_estimated=$(calculate_estimated_ratio)
+echo "Estimated compression ratio of sampled files: ${compression_ratio_estimated}x"
 if (( total_files_scanned < total_file_count )); then
        size_of_all_files=$((total_file_size * total_file_count / total_files_scanned))
        echo "Estimated size of all $total_file_count files: $(print_size $size_of_all_files)"