From bc1286c7d7ab728d893bfede8a983c7fed225e75 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sun, 19 Nov 2023 21:50:21 -0500 Subject: [PATCH] EX-7601 osc: calculate compressed size reduction accurately Compression reduces space used if it results in allocating at least one fewer block on disk. Modify the checks in compress_chunk to reflect this, rather than using the simpler "reduce size by at least 4K" calculation. Also do not attempt to compress chunks if they are less than 4K in size, since they can't possibly get a space benefit. This improved my measured ratio on a version of the Linux kernel source data set from 1.24 to 1.56, so this is significant for datasets with many small files. (This version of the source had large incompressible files removed, to focus on smaller files. The unmodified data set would not improve as much.) Note this is still short of our estimates, so either the estimate or Lustre still needs adjustment. TBD. Signed-off-by: Patrick Farrell Change-Id: I815706914b88de4f532a674d773769aa3a64d218 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53181 Tested-by: jenkins Tested-by: Andreas Dilger Reviewed-by: Artem Blagodarenko Reviewed-by: Andreas Dilger --- lustre/obdclass/lustre_compr.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lustre/obdclass/lustre_compr.c b/lustre/obdclass/lustre_compr.c index a62c5c5..3b85182 100644 --- a/lustre/obdclass/lustre_compr.c +++ b/lustre/obdclass/lustre_compr.c @@ -220,8 +220,18 @@ int compress_chunk(const char *obd_name, struct crypto_comp *cc, { struct ll_compr_hdr *llch; unsigned int len = *out_len - sizeof(*llch); + int effective_uncompr_size; + int effective_compr_size; int rc; + /* the uncompressed data is shorter than the minimum effective + * compressed size, so don't bother compressing + */ + if (in_len <= COMP_GAP) { + *out_len = in_len; + return 0; + } + rc = crypto_comp_compress(cc, in, in_len, out + sizeof(*llch), &len); @@ -232,10 +242,15 @@ int compress_chunk(const char *obd_name, struct crypto_comp *cc, return 0; } - if (len + sizeof(*llch) + COMP_GAP > in_len) { + /* round the sizes up to the nearest block before comparing */ + effective_compr_size = round_up(len + sizeof(*llch), COMP_GAP); + effective_uncompr_size = round_up(in_len, COMP_GAP); + + if (effective_compr_size >= effective_uncompr_size) { CDEBUG(D_SEC, - "Compressed %u + overhead %lu > plain %u, leaving uncompressed\n", - len, sizeof(*llch) + COMP_GAP, in_len); + "Compressed %u + overhead %u > plain %u + overhead %u, leaving uncompressed\n", + len, effective_compr_size - len, in_len, + effective_uncompr_size - in_len); *out_len = in_len; return 0; } -- 1.8.3.1