From 8298a12d1a71290df86c91be9a4f451f23c63eef Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Fri, 24 Nov 2023 17:26:10 +0800 Subject: [PATCH] EX-8355 csdc: stop compressing incompressible file data The reduced_ratio (original_size/compress_reduced_size) represents the minimum fraction of pages that are compressed out of each chunk, namely the compressed chunk needs to shrink by at least 1/reduced_ratio blocks for it to be "compressible". Let size compression_ratio be defined as original_size/after_compression_size, so reduced_ratio = compression_ratio / (compression_ratio - 1) and we set its default value to 16, equivalent to 1.07 of compression ratio (i.e. needs to shrink at least one 4KB block out of each 64KB chunk). After every compress_check_bytes of data being compressed, file's compressibility would be re-calculated based on average compress_reduced and average compress_orig data size. Stop compressing file data if it is deemed to be incompressible, and after compress_skip_bytes data have been written uncompressed , retry the file compressibility check. compress_reduced_ratio, compress_check_bytes, compress_skip_bytes are tunable parameters: osc.*.compress_reduced_ratio osc.*.compress_check_bytes osc.*.compress_skip_bytes their default values are 16, 1M and 32M respectively. Test-Parameters: testlist=sanity-compr Signed-off-by: Bobi Jam Change-Id: I4ce3d752c67f18ba7b100c72a2bb61a91258c6e8 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53306 Tested-by: jenkins Tested-by: Andreas Dilger Reviewed-by: Artem Blagodarenko Reviewed-by: Andreas Dilger --- lustre/include/cl_object.h | 6 +- lustre/include/lustre_osc.h | 11 ++ lustre/include/obd.h | 16 +++ lustre/include/uapi/linux/lustre/lustre_user.h | 2 +- lustre/ldlm/ldlm_lib.c | 16 +++ lustre/lfsck/lfsck_layout.c | 12 +- lustre/lod/lod_object.c | 4 +- lustre/lov/lov_io.c | 4 + lustre/lov/lov_object.c | 34 +++--- lustre/mdd/mdd_object.c | 6 +- lustre/osc/lproc_osc.c | 118 ++++++++++++++++++ lustre/osc/osc_compress.c | 163 +++++++++++++++++++++++-- lustre/osc/osc_io.c | 2 + lustre/ptlrpc/wiretest.c | 2 + lustre/tests/sanity-compr.sh | 77 +++++++++++- lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 2 + 17 files changed, 444 insertions(+), 32 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index b09f67d..a579dae 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1983,7 +1983,11 @@ struct cl_io { * write is probably size expanding and doesn't have to be chunk * aligned */ - ci_size_extend_compression:1; + ci_size_extend_compression:1, + /** + * tell the lower layers the write does not compress data + */ + ci_incompressible:1; /** * How many times the read has retried before this one. * Set by the top level and consumed by the LOV. diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 21f8f76..5b30224 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -175,6 +175,16 @@ struct osc_thread_info { struct cl_sync_io oti_anchor; struct cl_req_attr oti_req_attr; struct lu_buf oti_ladvise_buf; + + /* compression ratio records */ + /* averaged compression original size */ + unsigned long oti_compress_orig; + /* averaged compression reduced size */ + unsigned long oti_compress_reduced; + /* written bytes of compressible file */ + unsigned long oti_compress_checked_bytes; + /* skipped bytes for compressibility checking of incompressible file */ + unsigned long oti_compress_skip_bytes; }; static inline __u64 osc_enq2ldlm_flags(__u32 enqflags) @@ -314,6 +324,7 @@ struct osc_object { const struct osc_object_operations *oo_obj_ops; bool oo_initialized; + bool oo_incompressible; }; static inline void osc_build_res_name(struct osc_object *osc, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 3ce6bbd..a766501 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -395,6 +395,22 @@ struct client_obd { unsigned int cl_compr_fast_level; enum ll_compr_type cl_compr_type_best; unsigned int cl_compr_best_level; + + /* compression ratio parameters */ + /* if a file compress_orig > (compress_reduced * compress_reduced_ratio) + * then the file is deemed to be incompressible. + * reduced_ratio = original_size / compress_reduced_size + * compression_ratio = original_size / after_compression_size + * so + * reduced_ratio = compression_ratio / (compression_ratio - 1) */ + unsigned int cl_compress_reduced_ratio; + /* multiplier of (1<u.cli.cl_target_uuid.uuid) diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 0ff9729..b963b0f 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1015,6 +1015,7 @@ enum lov_comp_md_flags { LCM_FL_SYNC_PENDING = 0x3, LCM_FL_PCC_RDONLY = 0x8, LCM_FL_FLR_MASK = 0xB, + LCM_FL_INCOMPRESSIBLE = 0x10, /* 5th bit - incompressible */ }; struct lov_comp_md_v1 { @@ -3033,7 +3034,6 @@ enum ll_compr_type { #define COMPR_CHUNK_MIN_BITS 16 #define COMPR_GET_CHUNK_SIZE(log_bits) (1 << (log_bits + COMPR_CHUNK_MIN_BITS)) - /* 64 MiB - a compressed chunk can never be bigger than PTLRPC_MAX_BRW_SIZE * (which isn't easily accessed here) */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 2197ef0..bf2af6c 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -535,6 +535,22 @@ int client_obd_setup(struct obd_device *obd, struct lustre_cfg *lcfg) cli->cl_compr_fast_level = 1; cli->cl_compr_type_best = LL_COMPR_TYPE_LZ4HC; cli->cl_compr_best_level = 9; + /* + * compressible file check compressibility after every 1MiB write, + * 16 * (1 << COMPR_CHUNK_MIN_BITS) = 16 * 64KB + */ + cli->cl_compress_check_multiplier = 16; + /* + * incompressible file write skip 32MiB before re-check its + * compressiblity, 512 * 64KB + */ + cli->cl_compress_skip_multiplier = 512; + /* + * a file is deemed to be compressible when compression reduces size + * by at least 1/16 of its original size, i.e. shrink 4KB out of 64KB + * chunk + */ + cli->cl_compress_reduced_ratio = 16; if (connect_op == MDS_CONNECT) { cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1; diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 4255fac..a6715ee 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -1917,9 +1917,10 @@ static void lfsck_layout_update_lcm(struct lov_comp_md_v1 *lcm, __u64 end = le64_to_cpu(lcme->lcme_extent.e_end); __u32 gen = version + range; __u32 tmp_gen; - int i; __u16 count = le16_to_cpu(lcm->lcm_entry_count); __u16 flags = le16_to_cpu(lcm->lcm_flags); + __u16 flr_state = flags & LCM_FL_FLR_MASK; + int i; if (!gen) gen = 1; @@ -1928,9 +1929,12 @@ static void lfsck_layout_update_lcm(struct lov_comp_md_v1 *lcm, lcm->lcm_layout_gen = cpu_to_le32(gen); if (range) - lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING); - else if (flags == LCM_FL_NONE && le16_to_cpu(lcm->lcm_mirror_count) > 0) - lcm->lcm_flags = cpu_to_le16(LCM_FL_RDONLY); + lcm->lcm_flags = cpu_to_le16((flags & ~LCM_FL_FLR_MASK) | + LCM_FL_WRITE_PENDING); + else if (flr_state == LCM_FL_NONE && + le16_to_cpu(lcm->lcm_mirror_count) > 0) + lcm->lcm_flags = cpu_to_le16((flags & ~LCM_FL_FLR_MASK) | + LCM_FL_RDONLY); if (compr->ol_compr_type != LL_COMPR_TYPE_NONE) { lcme->lcme_flags |= cpu_to_le32(LCME_FL_COMPRESS); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 575e459..1ac06ad 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3624,7 +3624,9 @@ static int lod_declare_layout_merge(const struct lu_env *env, lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count); lcm->lcm_mirror_count = cpu_to_le16(mirror_count); if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NONE) - lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY); + lcm->lcm_flags = cpu_to_le16((le16_to_cpu(lcm->lcm_flags) & + ~LCM_FL_FLR_MASK) | + LCM_FL_RDONLY); rc = lod_striping_reload(env, lo, buf, 0); if (rc) diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 79f4225..d5635a7 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -633,6 +633,9 @@ static int lov_io_slice_init(struct lov_io *lio, LASSERT(obj->lo_lsm != NULL); + io->ci_incompressible = !!(obj->lo_lsm->lsm_flags & + LCM_FL_INCOMPRESSIBLE); + result = lov_io_mirror_init(lio, obj, io); if (result) GOTO(out, result); @@ -811,6 +814,7 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio, int stripe = lov_comp_stripe(sub->sub_subio_index); io->ci_compressed_file = parent->ci_compressed_file; + io->ci_incompressible = parent->ci_incompressible; switch (io->ci_type) { case CIT_SETATTR: { diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 5d46e9a..10d4987 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -1203,14 +1203,17 @@ const static struct lov_layout_operations lov_dispatch[] = { /** * Performs a double-dispatch based on the layout type of an object. */ -#define LOV_2DISPATCH_NOLOCK(obj, op, ...) \ -({ \ - struct lov_object *__obj = (obj); \ - enum lov_layout_type __llt; \ - \ - __llt = __obj->lo_type; \ - LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \ - lov_dispatch[__llt].op(__VA_ARGS__); \ +#define LOV_2DISPATCH_NOLOCK(obj, op, ...) \ +({ \ + struct lov_object *__obj = (obj); \ + enum lov_layout_type __llt; \ + typeof(lov_dispatch[0].op(__VA_ARGS__)) __result = 0; \ + \ + __llt = __obj->lo_type; \ + LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \ + if (lov_dispatch[__llt].op) \ + __result = lov_dispatch[__llt].op(__VA_ARGS__); \ + __result; \ }) /** @@ -1271,15 +1274,16 @@ static inline void lov_conf_thaw(struct lov_object *lov) #define LOV_2DISPATCH(obj, op, ...) \ LOV_2DISPATCH_MAYLOCK(obj, op, 1, __VA_ARGS__) -#define LOV_2DISPATCH_VOID(obj, op, ...) \ -do { \ - struct lov_object *__obj = (obj); \ - enum lov_layout_type __llt; \ - \ +#define LOV_2DISPATCH_VOID(obj, op, ...) \ +do { \ + struct lov_object *__obj = (obj); \ + enum lov_layout_type __llt; \ + \ lov_conf_freeze(__obj); \ - __llt = __obj->lo_type; \ + __llt = __obj->lo_type; \ LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \ - lov_dispatch[__llt].op(__VA_ARGS__); \ + if (lov_dispatch[__llt].op) \ + lov_dispatch[__llt].op(__VA_ARGS__); \ lov_conf_thaw(__obj); \ } while (0) diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index b30b0ce..d8e05a6 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -1803,14 +1803,16 @@ static int mdd_split_ea(struct lov_comp_md_v1 *comp_v1, __u16 mirror_id, comp_rem->lcm_entry_count = cpu_to_le32(comp_cnt - count); comp_rem->lcm_size = cpu_to_le32(lmm_size - lmm_size_vic); if (!comp_rem->lcm_mirror_count) - comp_rem->lcm_flags = cpu_to_le16(LCM_FL_NONE); + comp_rem->lcm_flags = cpu_to_le16(comp_rem->lcm_flags & + ~LCM_FL_FLR_MASK); memset(comp_vic, 0, sizeof(*comp_v1)); comp_vic->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1); comp_vic->lcm_mirror_count = 0; comp_vic->lcm_entry_count = cpu_to_le32(count); comp_vic->lcm_size = cpu_to_le32(lmm_size_vic + sizeof(*comp_vic)); - comp_vic->lcm_flags = cpu_to_le16(LCM_FL_NONE); + comp_vic->lcm_flags = cpu_to_le16(comp_vic->lcm_flags & + ~LCM_FL_FLR_MASK); comp_vic->lcm_layout_gen = 0; offset = sizeof(*comp_v1) + sizeof(*entry) * comp_cnt; diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index d07001a..9e69ee9 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -847,6 +847,121 @@ static ssize_t osc_stats_compr_seq_write(struct file *file, } LPROC_SEQ_FOPS(osc_stats_compr); +static ssize_t compress_reduced_ratio_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + + return sprintf(buf, "%u\n", obd->u.cli.cl_compress_reduced_ratio); +} +static ssize_t compress_reduced_ratio_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + unsigned int val; + int rc; + + rc = kstrtouint(buffer, 0, &val); + if (rc) + return rc; + + if (val == 0) + return -ERANGE; + + obd->u.cli.cl_compress_reduced_ratio = val; + + return count; +} +LUSTRE_RW_ATTR(compress_reduced_ratio); + +static ssize_t compress_check_bytes_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + unsigned long val = obd->u.cli.cl_compress_check_multiplier << + COMPR_CHUNK_MIN_BITS; + char suffix = ' '; + + if (val == 0) + return sprintf(buf, "0\n"); + + if ((val & ((1 << 20) - 1)) == 0) { + val >>= 20; + suffix = 'M'; + } else if ((val & ((1 << 10) - 1)) == 0) { + val >>= 10; + suffix = 'K'; + } + + return sprintf(buf, "%lu%c\n", val, suffix); +} +static ssize_t compress_check_bytes_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + u64 val; + int rc; + + rc = string_to_size(&val, buffer, count); + if (rc < 0) + return rc; + + obd->u.cli.cl_compress_check_multiplier = val >> COMPR_CHUNK_MIN_BITS; + if (obd->u.cli.cl_compress_check_multiplier == 0) + obd->u.cli.cl_compress_check_multiplier = 1; + + return count; +} +LUSTRE_RW_ATTR(compress_check_bytes); + +static ssize_t compress_skip_bytes_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + unsigned long val = obd->u.cli.cl_compress_skip_multiplier << + COMPR_CHUNK_MIN_BITS; + char suffix = ' '; + + if (val == 0) + return sprintf(buf, "0\n"); + + if ((val & ((1 << 20) - 1)) == 0) { + val >>= 20; + suffix = 'M'; + } else if ((val & ((1 << 10) - 1)) == 0) { + val >>= 10; + suffix = 'K'; + } + + return sprintf(buf, "%lu%c\n", val, suffix); +} +static ssize_t compress_skip_bytes_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + u64 val; + int rc; + + rc = string_to_size(&val, buffer, count); + if (rc < 0) + return rc; + + obd->u.cli.cl_compress_skip_multiplier = val >> COMPR_CHUNK_MIN_BITS; + + return count; +} +LUSTRE_RW_ATTR(compress_skip_bytes); + LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags); LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid); LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts); @@ -1066,6 +1181,9 @@ static struct attribute *osc_attrs[] = { &lustre_attr_idle_timeout.attr, &lustre_attr_idle_connect.attr, &lustre_attr_grant_shrink.attr, + &lustre_attr_compress_reduced_ratio.attr, + &lustre_attr_compress_check_bytes.attr, + &lustre_attr_compress_skip_bytes.attr, NULL, }; diff --git a/lustre/osc/osc_compress.c b/lustre/osc/osc_compress.c index 2fa6aa3..3cfd5b1 100644 --- a/lustre/osc/osc_compress.c +++ b/lustre/osc/osc_compress.c @@ -105,17 +105,113 @@ static int fill_cpga(struct brw_page **cpga, struct brw_page **pga, return 0; } +/** + * A decaying average is a weighted average using decaying rate to determing + * the relative weight of the more recent value versus ealier ones, the + * higher the decay rate, the more heavily the most recent value is weighted. + */ +static unsigned int decaying_average(unsigned int prev, unsigned int value, + unsigned int decay_rate) +{ + if (decay_rate > 100) + decay_rate = 100; + + return (prev * (100 - decay_rate) + value * decay_rate) / 100; +} + +/** + * \param[in] src_size data bytes before processing + * \param[in] dst_size data bytes after compressing + * \param[in,out] incompressible file is regard as incompressible + * + * if @src_size == @dst_size, then data has not been compressed. + */ +static void update_compression_info(struct lu_env *env, struct client_obd *cli, + unsigned int src_size, + unsigned int dst_size, + bool *incompressible) +{ + struct osc_thread_info *info = osc_env_info(env); + + /* file not compressible, update oti_compress_skip_bytes */ + if (*incompressible) { + /* don't retry compression if indicated */ + if (cli->cl_compress_skip_multiplier == 0) + return; + + info->oti_compress_skip_bytes += src_size; + + CDEBUG(D_SEC, "file not compressible, skip %lu bytes\n", + info->oti_compress_skip_bytes); + + if (info->oti_compress_skip_bytes >= + (cli->cl_compress_skip_multiplier << + COMPR_CHUNK_MIN_BITS)) { + CDEBUG(D_SEC, + "skip %lu bytes uncompressed data, start to check compressibility\n", + info->oti_compress_skip_bytes); + /* hibernation is over */ + *incompressible = false; + info->oti_compress_orig = 0; + info->oti_compress_reduced = 0; + info->oti_compress_checked_bytes = 0; + } + return; + } + + /* file is compressible, update compression ratio. */ + info->oti_compress_checked_bytes += src_size; + info->oti_compress_orig = decaying_average(info->oti_compress_orig, + src_size, 90); + info->oti_compress_reduced = decaying_average( + info->oti_compress_reduced, + (src_size > dst_size) ? + (src_size - dst_size) : 0, 90); + CDEBUG(D_SEC, "compression checked %lu bytes, average compression reduced %lu bytes of %lu raw bytes\n", + info->oti_compress_checked_bytes, info->oti_compress_reduced, + info->oti_compress_orig); + /* + * we'd process up to compress_check_bytes data then + * check the compressibility. + */ + if (info->oti_compress_checked_bytes >= + (cli->cl_compress_check_multiplier << + COMPR_CHUNK_MIN_BITS)) { + /* finished a compressibility check cycle, start a new one. */ + info->oti_compress_checked_bytes = 0; + + if (info->oti_compress_orig > info->oti_compress_reduced * + cli->cl_compress_reduced_ratio) { + CDEBUG(D_SEC, + "compression reduced %lu bytes ( < %lu/%u bytes), the file is considered as incompressible\n", + info->oti_compress_reduced, + info->oti_compress_orig, + cli->cl_compress_reduced_ratio); + /* + * the file is regarded incompressible, start + * compressibility checking hibernation cycle. + */ + *incompressible = true; + /* init compress hibernation */ + info->oti_compress_skip_bytes = 0; + } + } +} + /* returns 0 on success, non-zero on failure to compress */ int compress_request(struct client_obd *cli, struct obdo *oa, struct brw_page **pga, struct brw_page ***cpga, u32 *page_count, __u64 kms) { const char *obd_name = cli->cl_import->imp_obd->obd_name; + struct lu_env *env; + __u16 refcheck; + struct osc_object *obj; struct cl_page *clpage; struct crypto_comp *cc; enum ll_compr_type type; unsigned int total_src_size = 0; - unsigned int total_dst_size = 0; + unsigned int total_compr_size = 0; unsigned int total_uncompr_size = 0; int compr_chunk_count = 0; int chunks_no_compr = 0; @@ -132,9 +228,14 @@ int compress_request(struct client_obd *cli, struct obdo *oa, int pga_i = 0; int rc = 0; int lvl; + bool incompressible; ENTRY; + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + clpage = oap2cl_page(brw_page2oap(pga[0])); lvl = clpage->cp_compr_level; type = clpage->cp_compr_type; @@ -146,6 +247,9 @@ int compress_request(struct client_obd *cli, struct obdo *oa, src_buf_bits = chunk_bits + 1; dest_buf_bits = chunk_bits + 1; + obj = brw_page2oap(pga[0])->oap_obj; + incompressible = obj->oo_incompressible; + rc = alloc_compr(cli, &type, &lvl, &cc); /* if we're unable to setup compression, alloc_compr prints a warning * but we do not fail the IO - just write data uncompressed @@ -168,9 +272,9 @@ int compress_request(struct client_obd *cli, struct obdo *oa, bool chunk_unmergeable = false; bool compress_this = false; bool compressed = false; - unsigned int src_size; - unsigned int dst_size; - __u64 chunk_len_bytes; + unsigned int src_size = 0; + unsigned int dst_size = 0; + __u64 chunk_len_bytes = 0; int chunk_len = 1; int chunk_start; int curr; @@ -240,6 +344,18 @@ int compress_request(struct client_obd *cli, struct obdo *oa, pga[chunk_start]->off, pg_last->off + pg_last->count, kms); if (compress_this) { + if (incompressible) { + update_compression_info(env, cli, + chunk_len_bytes, + chunk_len_bytes, + &incompressible); + if (incompressible) { + chunks_no_compr++; + GOTO(skip, compressed); + } + + } + CDEBUG(D_SEC, "compressing chunk from page [%d, %d], off [%llu, %llu]\n", chunk_start, chunk_start + chunk_len - 1, @@ -266,6 +382,11 @@ int compress_request(struct client_obd *cli, struct obdo *oa, if (!compressed) { sptlrpc_pool_put_pages(&dst, dest_buf_bits); chunks_no_compr++; + /* not compressed, update chunk_len_bytes to + * the real value when the chunk end reaches + * the kms + */ + chunk_len_bytes = src_size; GOTO(skip, compressed); } @@ -290,7 +411,7 @@ int compress_request(struct client_obd *cli, struct obdo *oa, pga_i += chunk_len; compr_chunk_count++; compr_pages_count += ((src_size - 1) >> PAGE_SHIFT) + 1; - total_dst_size += dst_size; + total_compr_size += dst_size; total_src_size += src_size; } else { chunks_no_compr++; @@ -312,9 +433,35 @@ skip: cpga_i++; uncompr_pages_count++; total_uncompr_size += pg->count; - total_dst_size += pg->count; total_src_size += pg->count; } + + /* update compression info when page aligns to chunk start */ + if (!(pg->off & (chunk_size - 1))) { + /* pages cannot fill a whole chunk nor reaches kms */ + if (!compress_this) { + update_compression_info(env, cli, + chunk_len_bytes, + chunk_len_bytes, + &incompressible); + continue; + } + /* in the case of trying to compress incompressible + * chunk, the compression info has been updated before + */ + if (incompressible) + continue; + + if (compressed) + update_compression_info(env, cli, + src_size, dst_size, + &incompressible); + else + update_compression_info(env, cli, + chunk_len_bytes, + chunk_len_bytes, + &incompressible); + } } *page_count = cpga_i; @@ -322,7 +469,7 @@ skip: spin_lock(&cli->cl_compr_stats_lock); cli->cl_w_pages_compr += compr_pages_count; cli->cl_w_pages_uncompr += uncompr_pages_count; - cli->cl_w_bytes_compr += total_dst_size; + cli->cl_w_bytes_compr += total_compr_size; cli->cl_w_bytes_raw += total_src_size; cli->cl_w_bytes_incompr += total_uncompr_size; cli->cl_w_chunks_compr += compr_chunk_count; @@ -339,6 +486,8 @@ out: if (rc != 0 && *cpga != NULL) free_cpga(*cpga, *page_count); + cl_env_put(env, &refcheck); + RETURN(rc); } diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 7777445..3c85ef4 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -424,6 +424,8 @@ int osc_io_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) if (capable(CAP_SYS_RESOURCE)) oio->oi_cap_sys_resource = 1; + osc->oo_incompressible = ios->cis_io->ci_incompressible; + RETURN(rc); } EXPORT_SYMBOL(osc_io_iter_init); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 42813d2..9ea4519 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -2063,6 +2063,8 @@ void lustre_assert_wire_constants(void) (long long)LCM_FL_PCC_RDONLY); LASSERTF(LCM_FL_FLR_MASK == 11, "found %lld\n", (long long)LCM_FL_FLR_MASK); + LASSERTF(LCM_FL_INCOMPRESSIBLE == 16, "found %lld\n", + (long long)LCM_FL_INCOMPRESSIBLE); /* Checks for struct lmv_mds_md_v1 */ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n", diff --git a/lustre/tests/sanity-compr.sh b/lustre/tests/sanity-compr.sh index a5b6f2a..1fb6e0b 100644 --- a/lustre/tests/sanity-compr.sh +++ b/lustre/tests/sanity-compr.sh @@ -620,6 +620,9 @@ test_1004() { # Larger than arm page size local chunksize=128 + debugsave + $LCTL set_param debug=-1 debug_mb=256 + if [[ -f $hdf.bz2 ]] && type -p bzcat >/dev/null; then bzcat $hdf.bz2 > $tmp_hdf elif [[ -f $hdf.bz2 ]] && type -p bunzip2 >/dev/null; then @@ -701,6 +704,7 @@ test_1004() { dd if=$source seek=4 skip=4 bs=$((PAGE_SIZE * 2 - 1024)) of=$tmp_hdf conv=notrunc || error "(14.2) dd failed" flush_and_compare $tmp_hdf $tf "(15)" + debugrestore } run_test 1004 "initial test for write updating" @@ -1265,7 +1269,8 @@ test_1020() { compression_enabled || skip "compression is disabled ($(uname -a))" - lctl set_param debug=sec debug_mb=256 + debugsave + $LCTL set_param debug=sec debug_mb=256 # Disable readahead so reads are not expanded to full chinks $LCTL set_param osc.*.rpc_stats=c @@ -1320,6 +1325,76 @@ test_1020() { } run_test 1020 "Checking compression counters" +test_1021() { + (( MDS1_VERSION >= $(version_code 2.14.0-ddn149) )) || + skip "Need MDS version at least 2.14.0-ddn149" + + test_mkdir -p $DIR/$tdir + local tf=$DIR/$tdir/$tfile + local hdf=$LUSTRE/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf + local tmp_hdf=$TMP/$tfile.hdf + + if [[ ! -f $tmp_hdf ]]; then + if [[ -f $hdf.bz2 ]] && type -p bzcat >/dev/null; then + bzcat $hdf.bz2 > $tmp_hdf + elif [[ -f $hdf.bz2 ]] && type -p bunzip2 >/dev/null; then + cp $hdf.bz2 $tmp_hdf.bz2 || error "cp $tmp_hdf.bz2" + bunzip2 $tmp_hdf.bz2 || error "bunzip2 $tmp_hdf.bz2" + else + skip_env "bunzip2 is not installed" + fi + fi + + compression_enabled || skip "compression is disabled ($(uname -a))" + + checkbytes=($($LCTL get_param -n osc.$FSNAME-OST*.compress_check_bytes)) + skipbytes=($($LCTL get_param -n osc.$FSNAME-OST*.compress_skip_bytes)) + ratio=($($LCTL get_param -n osc.$FSNAME-OST*.compress_reduced_ratio)) + stack_trap "$LCTL set_param \ + osc.$FSNAME-OST*.compress_check_bytes=$checkbytes >/dev/null" + stack_trap "$LCTL set_param \ + osc.$FSNAME-OST*.compress_skip_bytes=$skipbytes >/dev/null" + stack_trap "$LCTL set_param \ + osc.$FSNAME-OST*.compress_reduced_ratio=$ratio >/dev/null" + $LCTL set_param osc.$FSNAME-OST*.compress_check_bytes=1M >/dev/null + $LCTL set_param osc.$FSNAME-OST*.compress_skip_bytes=8M >/dev/null + $LCTL set_param osc.$FSNAME-OST*.compress_reduced_ratio=16 >/dev/null + + $LFS setstripe -E-1 -c1 -Z lz4:0 --compress-chunk=64 $DIR/$tdir || + error "set a compress component in $DIR/$tdir failed" + + $LCTL set_param osc.*.stats_compr=0 >/dev/null + + # write incompressible data + echo "copy incompressible file ${hdf}.bz2" + cp -a ${hdf}.bz2 $tf || error "copy ${hdf}.bz2 to $tf failed" + sync + compr1=$($LCTL get_param osc.$FSNAME-OST*.stats_compr | + awk '/write_bytes_compr:/ {sum=sum+$2} END {print sum}') + incompr1=$($LCTL get_param osc.$FSNAME-OST*.stats_compr | + awk '/write_bytes_incompr:/ {sum=sum+$2} END {print sum}') + + echo " * after bz2 data write: compr/incompr $compr1/$incompr1" + # zip file should not be compressible + (( compr1 < 4096 )) || + error "should not write much compressed data ($compr1)" + (( incompr1 > compr1 )) || + error "should write more incompressible data ($incompr1 < $compr1)" + + # write compressible data + echo "copy compressible file $tmp_hdf" + cat $tmp_hdf >> $tf || error "append $tmp_hdf to $tf failed" + sync + compr2=$($LCTL get_param osc.$FSNAME-OST*.stats_compr | + awk '/write_bytes_compr:/ {sum=sum+$2} END {print sum}') + incompr2=$($LCTL get_param osc.$FSNAME-OST*.stats_compr | + awk '/write_bytes_incompr:/ {sum=sum+$2} END {print sum}') + + echo " * after plain data write: compr/incompr $((compr2-compr1))/$((incompr2-incompr1))" + (( compr2 > 0 )) || error "should write compressed data $compr2" +} +run_test 1021 "change file compressibiblity" + test_1080() { (( MDS1_VERSION >= $(version_code 2.14.0-ddn128) )) || skip "Need MDS version at least 2.14.0-ddn128" diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 4bf0ace..a50a26e 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -992,6 +992,7 @@ check_lov_comp_md_v1(void) CHECK_VALUE(LCM_FL_SYNC_PENDING); CHECK_VALUE(LCM_FL_PCC_RDONLY); CHECK_VALUE(LCM_FL_FLR_MASK); + CHECK_VALUE(LCM_FL_INCOMPRESSIBLE); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index b5e2cd3..ba8cbdf 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -2098,6 +2098,8 @@ void lustre_assert_wire_constants(void) (long long)LCM_FL_PCC_RDONLY); LASSERTF(LCM_FL_FLR_MASK == 11, "found %lld\n", (long long)LCM_FL_FLR_MASK); + LASSERTF(LCM_FL_INCOMPRESSIBLE == 16, "found %lld\n", + (long long)LCM_FL_INCOMPRESSIBLE); /* Checks for struct lmv_mds_md_v1 */ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n", -- 1.8.3.1