From f43b9ce9afaee73ae2537cbe32994850fe2964b6 Mon Sep 17 00:00:00 2001 From: Artem Blagodarenko Date: Wed, 30 Nov 2022 14:54:57 +0000 Subject: [PATCH] EX-6127 osc: osc brw request compression This patch adds client-side compression/decompression. The client-side data compression project (CSDC) reduces storage and network utilization by leveraging the more plentiful memory and CPU resources on the local client. Data is sent compressed over the network, saved directly to storage on the server side, and decompressed back on the client side. Uncompressed data is kept in client page cache, all while being functionally transparent to the end user and application. As an example, a test file is compressed and decompressed. The resulting file is compared with the original one. The test case shows 2.5x compression ratio: 356K /mnt/lustre/d460.sanity/sanity.sh 884K /tmp/cmp-46ofie/decompressed_sanity.sh Compression should read whole chunk even if offset and size differ. Let's modify readahead to force reading data from the offset and size multiple to the chunk size. Test-Parameters: testlist=sanity env=ONLY=460 Signed-off-by: Artem Blagodarenko Change-Id: I9b41ab815db3df9ad7bdea5fca4c093cbda8814b Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49511 Tested-by: jenkins Tested-by: Andreas Dilger Reviewed-by: Andreas Dilger --- lustre/include/cl_object.h | 10 +- lustre/include/lustre_osc.h | 4 + lustre/include/obd.h | 5 +- lustre/include/obd_support.h | 1 + lustre/include/uapi/linux/lustre/lustre_idl.h | 17 + lustre/include/uapi/linux/lustre/lustre_user.h | 5 +- lustre/llite/dir.c | 2 +- lustre/llite/file.c | 10 +- lustre/llite/rw.c | 5 +- lustre/lov/lov_io.c | 114 ++++- lustre/lov/lov_object.c | 5 + lustre/osc/Makefile.in | 4 +- lustre/osc/osc_compress.c | 556 +++++++++++++++++++++++++ lustre/osc/osc_compress.h | 38 ++ lustre/osc/osc_internal.h | 3 + lustre/osc/osc_request.c | 133 ++++-- lustre/ptlrpc/wiretest.c | 59 +++ lustre/tests/sanity.sh | 175 ++++++++ lustre/utils/wirecheck.c | 38 ++ lustre/utils/wiretest.c | 58 +++ 20 files changed, 1194 insertions(+), 48 deletions(-) create mode 100644 lustre/osc/osc_compress.c create mode 100644 lustre/osc/osc_compress.h diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 9ee68b8..000235d 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -771,8 +771,14 @@ struct cl_page { enum cl_page_type cp_type:CP_TYPE_BITS; /* 32 bits */ /* which slab kmem index this memory allocated from */ short int cp_kmem_index; /* 48 bits */ - unsigned int cp_unused1:16; /* 64 bits */ - + /** Compression type **/ + enum ll_compr_type cp_comp_type:4; + /** Compression level **/ + u8 cp_comp_level:4; + /** Compression enabled **/ + u8 cp_comp_enabled:1; + /** Chunk Size **/ + u8 cp_chunk_log_bits:7; /* 64 bits */ /** * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned * by sub-io. Protected by a VM lock. diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index fc1474a..31bdec4 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -545,8 +545,12 @@ struct osc_brw_async_args { int aa_requested_nob; int aa_nio_count; u32 aa_page_count; + /* Page count before a compression */ + u32 aa_ncpage_count; s32 aa_resends; struct brw_page **aa_ppga; + /* Non compressed page array */ + struct brw_page **aa_ncppga; struct client_obd *aa_cli; struct list_head aa_oaps; struct list_head aa_exts; diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 8d85830..176a8c3 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -123,11 +123,14 @@ struct brw_page { struct page *pg; u32 count; u32 flag; + /* chunk memory pointer to free */ + char *bp_cmp_chunk; + /* compression chunk size in bytes */ + u32 bp_cmp_chunk_size; /* used for encryption: difference with offset in clear text page */ u16 bp_off_diff; /* used for encryption: difference with count in clear text page */ u16 bp_count_diff; - u32 bp_padding; }; struct timeout_item { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6cef841..d22629b 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -433,6 +433,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 #define OBD_FAIL_OSC_DELAY_CANCEL 0x416 #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 +#define OBD_FAIL_OSC_WRONG_COMP_ALG 0x418 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 6f1e0b7..3e5d295 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -3775,6 +3775,23 @@ struct ladvise_hdr { struct lu_ladvise lah_advise[0]; /* advices in this header */ }; +#define LLCH_MAGIC 0xC0398E55DA7A +/* Compression chunk header */ +struct ll_compr_hdr { + __u64 llch_magic:48; /* LLCH_MAGIC */ + __u8 llch_header_size; /* for future extensions */ + __u8 llch_exta_flags; + __u8 llch_compr_type; /* LLCH_COMP_GZIP, LLCH_COMP_LZ4, */ + __u8 llch_compr_level:4; /* per-algorithm mapped level */ + __u8 llch_flags:4; + __u8 llch_chunk_log_bits; + __u32 llch_compr_size; /* bytes of compressed data */ + __u32 llch_reserved; /* unused, initialize to 0 */ + __u32 llch_uncompr_csum; /* crc32 of raw data, or 0 */ + __u32 llch_compr_csum; /* crc32 of compressed data, or 0 */ + __u32 llch_hdr_csum; /* crc32 of magic..compr_csum, or 0 */ +}; + #if defined(__cplusplus) } #endif diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index e7c6b44..446af93 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -2930,9 +2930,12 @@ enum ll_compr_type { LL_COMPR_TYPE_LZ4 = 4, LL_COMPR_TYPE_LZ4HC = 5, LL_COMPR_TYPE_LZO = 6, - LL_COMPR_TYPE_MAX + LL_COMPR_TYPE_MAX, + LL_COMPR_TYPE_UNCHANGED }; +#define COMPR_CHUNK_MIN_BITS 16 +#define COMPR_MIN_PAGES (1 << (COMPR_CHUNK_MIN_BITS - PAGE_SHIFT)) #if defined(__cplusplus) } #endif diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 4901208..dba2477 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -1532,7 +1532,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } case IOC_MDC_LOOKUP: { - int namelen, len = 0; + int namelen, len = 0; char *buf = NULL; char *filename; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index eb66796..542bf7a 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2478,6 +2478,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct ptlrpc_request *req = NULL; struct md_op_data *op_data; int rc, lmmsize; + int namesize; ENTRY; @@ -2485,14 +2486,19 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, if (rc) RETURN(rc); + namesize = filename ? strlen(filename) : 0; op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, - strlen(filename), lmmsize, + namesize, lmmsize, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; - rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); + if (filename) + rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); + else + rc = md_getattr(sbi->ll_md_exp, op_data, &req); + ll_finish_md_op_data(op_data); if (rc < 0) { CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n", diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index fdcea91..97167cb 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -731,11 +731,10 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, __u64 kms; struct ll_sb_info *sbi; struct ll_ra_info *ra; - - ENTRY; - ENTRY; + BUILD_BUG_ON(PAGE_SHIFT > 16); + clob = io->ci_obj; inode = vvp_object_inode(clob); sbi = ll_i2sbi(inode); diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 1723b04..0137581 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -1098,6 +1098,55 @@ static int lov_io_lock(const struct lu_env *env, const struct cl_io_slice *ios) RETURN(lov_io_call(env, cl2lov_io(env, ios), cl_io_lock)); } +/** + * Return lsme by file offset (only used for compression) + */ +static struct lov_stripe_md_entry *lov_io_lsme_at(const struct lu_env *env, + const struct cl_io_slice *ios, + const pgoff_t page_index) +{ + loff_t offset; + struct cl_fault_io *fio; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_object *lov = lio->lis_object; + struct lov_layout_composite *comp = &lov->u.composite; + int start_index = 0; + int end_index = comp->lo_entry_count - 1; + int i; + + fio = &ios->cis_io->u.ci_fault; + offset = cl_offset(ios->cis_obj, page_index); + LASSERT(lov->lo_type == LLT_COMP); + + /* This is actual file offset so nothing can cover eof. */ + if (offset == LUSTRE_EOF) + return NULL; + + if (lov_is_flr(lov)) { + struct lov_mirror_entry *lre; + + LASSERT(lio->lis_mirror_index >= 0); + + lre = lov_mirror_entry(lov, lio->lis_mirror_index); + start_index = lre->lre_start; + end_index = lre->lre_end; + } + + for (i = start_index; i <= end_index; i++) { + struct lov_layout_entry *lle = lov_entry(lov, i); + + LASSERT(!lsme_is_foreign(lle->lle_lsme)); + + if ((offset >= lle->lle_extent->e_start && + offset < lle->lle_extent->e_end) || + (offset == OBD_OBJECT_EOF && + lle->lle_extent->e_end == OBD_OBJECT_EOF)) + return lle->lle_lsme; + } + + return NULL; +} + static int lov_io_start(const struct lu_env *env, const struct cl_io_slice *ios) { ENTRY; @@ -1337,6 +1386,47 @@ int lov_io_lru_reserve(const struct lu_env *env, RETURN(0); } +static void __set_page_compression(struct cl_page *page, + struct lov_stripe_md_entry *lsme) +{ + if (lsme->lsme_pattern & LOV_PATTERN_COMPRESS) { + page->cp_comp_enabled = true; + page->cp_comp_type = lsme->lsme_compr_type; + page->cp_comp_level = lsme->lsme_compr_lvl; + page->cp_chunk_log_bits = lsme->lsme_compr_chunk_log_bits; + } +} + +static void set_page_compression(struct cl_page *page, + struct lov_stripe_md_entry *lsme) +{ + if (!lsme) + return; + + CDEBUG(D_PAGE, + "compr: type %i, level %i, bits: %i, page %p\n", + lsme->lsme_compr_type, lsme->lsme_compr_lvl, + lsme->lsme_compr_chunk_log_bits, page); + + __set_page_compression(page, lsme); +} + +static void set_page_list_compression(struct cl_page_list *queue, + struct lov_stripe_md_entry *lsme) +{ + struct cl_page *page; + + if (!lsme) + return; + + cl_page_list_for_each(page, queue) { + CDEBUG(D_PAGE, "compr: type %i, level %i, bits: %i, page %p\n", + lsme->lsme_compr_type, lsme->lsme_compr_lvl, + lsme->lsme_compr_chunk_log_bits, page); + __set_page_compression(page, lsme); + } +} + /** * lov implementation of cl_operations::cio_submit() method. It takes a list * of pages in \a queue, splits it into per-stripe sub-lists, invokes @@ -1365,6 +1455,7 @@ static int lov_io_submit(const struct lu_env *env, bool dio = false; int index; int rc = 0; + struct lov_stripe_md_entry * lsme; ENTRY; if (page->cp_type == CPT_TRANSIENT) @@ -1375,6 +1466,9 @@ static int lov_io_submit(const struct lu_env *env, struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; page = cl_page_list_first(qin); + lsme = lov_io_lsme_at(env, ios, page->cp_page_index); + set_page_compression(page, lsme); + if (lov_page_is_empty(page)) { cl_page_list_move(&queue->c2_qout, qin, page); @@ -1400,7 +1494,8 @@ static int lov_io_submit(const struct lu_env *env, continue; cl_page_list_move(&cl2q->c2_qin, qin, page); - } + set_page_compression(page, lsme); + } } else { cl_page_list_splice(qin, &cl2q->c2_qin); } @@ -1436,13 +1531,24 @@ static int lov_io_commit_async(const struct lu_env *env, struct lov_io *lio = cl2lov_io(env, ios); struct lov_io_sub *sub; struct cl_page *page; + struct lov_stripe_md_entry *lsme; int rc = 0; ENTRY; + page = cl_page_list_first(queue); + lsme = lov_io_lsme_at(env, ios, page->cp_page_index); + if (lio->lis_nr_subios == 1) { int idx = lio->lis_single_subio_index; LASSERT(!lov_page_is_empty(cl_page_list_first(queue))); + if (lsme) { + CDEBUG(D_SEC,"compr: type %i, level %i, bits: %i\n", + lsme->lsme_compr_type, lsme->lsme_compr_lvl, + lsme->lsme_compr_chunk_log_bits); + set_page_list_compression(queue, lsme); + } + sub = lov_sub_get(env, lio, idx); LASSERT(!IS_ERR(sub)); @@ -1458,10 +1564,11 @@ static int lov_io_commit_async(const struct lu_env *env, int index; LASSERT(plist->pl_nr == 0); - page = cl_page_list_first(queue); LASSERT(!lov_page_is_empty(page)); cl_page_list_move(plist, queue, page); + lsme = lov_io_lsme_at(env, ios, page->cp_page_index); + set_page_compression(page, lsme); index = page->cp_lov_index; while (queue->pl_nr > 0) { @@ -1470,6 +1577,8 @@ static int lov_io_commit_async(const struct lu_env *env, break; cl_page_list_move(plist, queue, page); + lsme = lov_io_lsme_at(env, ios, page->cp_page_index); + set_page_compression(page, lsme); } if (queue->pl_nr > 0) /* still has more pages */ @@ -2034,5 +2143,4 @@ int lov_io_layout_at(struct lov_io *lio, __u64 offset) return -1; } - /** @} lov */ diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 5c4e62f..86f5a2b 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -1954,6 +1954,11 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj, return 0; } + lsme = lsm->lsm_entries[0]; + /* No support for compressed yet. */ + if (lsme->lsme_compr_type) + GOTO(out_lsm, rc = -EOPNOTSUPP); + if (!(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) { /** * If the entry count > 1 or stripe_count > 1 and the diff --git a/lustre/osc/Makefile.in b/lustre/osc/Makefile.in index 5f967e5..e197f60 100644 --- a/lustre/osc/Makefile.in +++ b/lustre/osc/Makefile.in @@ -1,6 +1,6 @@ MODULES := osc -osc-objs := osc_request.o lproc_osc.o osc_dev.o osc_object.o osc_page.o osc_lock.o osc_io.o osc_quota.o osc_cache.o +osc-objs := osc_request.o osc_compress.o lproc_osc.o osc_dev.o osc_object.o osc_page.o osc_lock.o osc_io.o osc_quota.o osc_cache.o -EXTRA_DIST = $(osc-objs:%.o=%.c) osc_internal.h +EXTRA_DIST = $(osc-objs:%.o=%.c) osc_internal.h osc_compress.h @INCLUDE_RULES@ diff --git a/lustre/osc/osc_compress.c b/lustre/osc/osc_compress.c new file mode 100644 index 0000000..e41b9b6 --- /dev/null +++ b/lustre/osc/osc_compress.c @@ -0,0 +1,556 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ + +/* + * Copyright (c) 2023, DataDirect Networks Inc, all rights reserved. + * Author: Artem Blagodarenko + */ + +#define DEBUG_SUBSYSTEM S_OSC + +#include +#include +#include "osc_internal.h" +#include "osc_compress.h" + +static void merge_chunk(struct brw_page **pga, int first, int count, + char *merged, unsigned int *size) +{ + struct brw_page *pg; + int page; + + *size = 0; + for (page = 0; page < count; page++) { + char *kaddr; + + pg = pga[first + page]; + + kaddr = kmap_atomic(pg->pg); + memcpy(merged + page * PAGE_SIZE, kaddr, + pg->count); + kunmap_atomic(kaddr); + *size += pg->count; + } +} + +static void unmerge_chunk(struct brw_page **pga, int first, int count, + char *merged, unsigned int size) +{ + struct brw_page *pg; + int page; + char *kaddr; + unsigned int left = size; + + for (page = 0; page < count; page++) { + pg = pga[first + page]; + + if (!pg) + continue; + + LASSERT(pg->count > 0); + CDEBUG(D_SEC, + "address 0x%px, offset 0x%llx, count %i, flag 0x%x\n", + page_address(pg->pg), pg->off, pg->count, + pg->flag); + kaddr = kmap_atomic(pg->pg); + memcpy(kaddr, merged + page * PAGE_SIZE, + PAGE_SIZE); + kunmap_atomic(kaddr); + + if (left < PAGE_SIZE) { + pg->count = left; + } else { + pg->count = PAGE_SIZE; + left -= PAGE_SIZE; + } + } +} + +static int alloc_comp(enum ll_compr_type *type, unsigned int lvl, + struct crypto_comp **cc) +{ + if (OBD_FAIL_CHECK(OBD_FAIL_OSC_WRONG_COMP_ALG)) + return -EIO; + + /* + * TDB: At some point we will probably want to allow "fast" and "best" + * to be configurable and/or dynamically selected by CPU speed. + */ + + if (*type == LL_COMPR_TYPE_BEST) + *type = LL_COMPR_TYPE_GZIP; + else if (*type == LL_COMPR_TYPE_FAST) + *type = LL_COMPR_TYPE_LZ4; + + *cc = crypto_alloc_comp(crypto_name_from_type(*type), 0, 0); + if (IS_ERR(*cc)) { + int ret = PTR_ERR(*cc); + CERROR("Cannot initialize compressor %i, error %i\n", *type, + ret); + *cc = NULL; + return ret; + } + + if (lvl != -1) + ll_crypto_comp_set_level(*cc, lvl); + + return 0; + +} +/* + * The minimum delta between compressed and plain data to + * use the compressed one. + */ +#define COMP_GAP 4096 +static int compress_chunk(const char *obd_name, struct obdo *oa, + const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem, struct cl_page *clpage, + enum ll_compr_type *applied_type) +{ + struct ll_compr_hdr *llch; + unsigned int len = *out_len - sizeof(*llch); + struct crypto_comp *cc = NULL; + enum ll_compr_type type = clpage->cp_comp_type; + int rc; + + /* + * Once compress_chunk() faced with unsupported compression + * algorithm, it replaces algorithm used for whole request. + */ + if (*applied_type != LL_COMPR_TYPE_UNCHANGED) + type = *applied_type; + +again: + rc = alloc_comp(&type, clpage->cp_comp_level, &cc); + if (!rc) { + CDEBUG(D_SEC, "%s: crypto_comp allocated, type %i, level %i\n", + obd_name, type, clpage->cp_comp_level); + } else if (type == LL_COMPR_TYPE_LZO) { + CWARN("%s: LZO(%i) unsupported, left plain: rc = %d\n", + obd_name, type, rc); + *out_len = in_len; + *applied_type = 0; + return 0; + } else if (type == LL_COMPR_TYPE_FAST) { + CWARN("%s: %i unsupported, try LZO(%i): rc = %d\n", + obd_name, type, LL_COMPR_TYPE_LZO, rc); + *applied_type = type = LL_COMPR_TYPE_LZO; + goto again; + } else { + CWARN("%s: %i unsupported, try FAST(%i): rc = %d\n", + obd_name, type, LL_COMPR_TYPE_FAST, rc); + *applied_type = type = LL_COMPR_TYPE_FAST; + goto again; + } + + rc = crypto_comp_compress(cc, in, in_len, + out + sizeof(*llch), + &len); + if (rc) { + CERROR("%s: Compression error %d: inode "DFID"\n", + obd_name, rc, oa->o_parent_seq, oa->o_parent_oid, + oa->o_parent_ver); + crypto_free_comp(cc); + return 0; + } + + if (len + sizeof(*llch) + COMP_GAP > in_len) { + CDEBUG(D_SEC, "Inode "DFID", compressed %u, plain %u, leaving uncompressed\n", + oa->o_parent_seq, oa->o_parent_oid, oa->o_parent_ver, + len, in_len); + *out_len = in_len; + crypto_free_comp(cc); + return 0; + } + + llch = (struct ll_compr_hdr *)out; + llch->llch_magic = LLCH_MAGIC; + llch->llch_header_size = sizeof(*llch); + llch->llch_compr_type = type; + llch->llch_compr_level = clpage->cp_comp_level; + llch->llch_chunk_log_bits = clpage->cp_chunk_log_bits; + llch->llch_flags = 0; + llch->llch_compr_size = len; + llch->llch_compr_csum = 0; + llch->llch_uncompr_csum = 0; + llch->llch_reserved = 0; + llch->llch_hdr_csum = 0; + + *out_len = len + sizeof(*llch); + + if (cc) + crypto_free_comp(cc); + + return 1; +} + +static inline struct page *mem_to_page(void *addr) +{ + if (!is_vmalloc_addr(addr)) + return virt_to_page(addr); + + return vmalloc_to_page(addr); +} + +void free_cpga(struct brw_page **cpga, u32 page_count) +{ + int i; + + for (i = 0; i < page_count; i++) { + if (cpga[i] == NULL) + continue; + + if (cpga[i]->bp_cmp_chunk) + sptlrpc_enc_pool_put_buf( + &cpga[i]->bp_cmp_chunk, + cpga[i]->bp_cmp_chunk_size); + + OBD_FREE(cpga[i], sizeof(**cpga)); + } + + OBD_FREE(cpga, page_count * sizeof(*cpga)); +} + +int fill_cpga(struct brw_page **cpga, struct brw_page **pga, + char *dst, int src_from, int dst_from, size_t dst_size) +{ + int chunk_offset; + int dst_page; + int src_page; + struct brw_page *pg; + struct osc_async_page *oap; + + for (chunk_offset = 0, dst_page = dst_from, src_page = src_from; + chunk_offset < dst_size; + chunk_offset += PAGE_SIZE, dst_page++, src_page++) { + /* TDB: shoulb be a slab cache */ + OBD_ALLOC_PTR(cpga[dst_page]); + + if (cpga[dst_page] == NULL) + RETURN(-ENOMEM); + + oap = brw_page2oap(pga[dst_page]); + oap->oap_brw_flags |= OBD_BRW_COMPRESSED; + + pg = cpga[dst_page]; + pg->off = pga[src_from]->off + chunk_offset; + if ((dst_size - chunk_offset) < PAGE_SIZE) + pg->count = dst_size - chunk_offset; + else + pg->count = PAGE_SIZE; + /* + * Compressed pages, flags are lost + * Let's choose first page in chunk + * flag to set to all pages + */ + pg->flag = pga[src_from]->flag; + if (dst) + pg->pg = mem_to_page(dst + chunk_offset); + else + pg->pg = pga[src_page]->pg; + + CDEBUG(D_SEC, "off 0x%llx, flag %x, pg %p, count %u\n", + pg->off, pg->flag, pg->pg, pg->count); + } + + return 0; +} + + +int compress_request(const char *obd_name, struct obdo *oa, + struct brw_page **pga, struct brw_page ***cpga, + u32 page_count, int *pcount) +{ + int chunk_size; + int pages_in_chunk; + int pga_i; + int cpga_i = 0; + int chunk_start = 0; + void *src = NULL; + void *dst = NULL; + unsigned int src_size; + unsigned int dst_size; + void *wrkmem = NULL; + int done = 0; + int rc = 0; + int count = 0; + struct cl_page *clpage; + unsigned int applied_type = LL_COMPR_TYPE_UNCHANGED; + + ENTRY; + + clpage = oap2cl_page(brw_page2oap(pga[chunk_start])); + chunk_size = (1 << (clpage->cp_chunk_log_bits + COMPR_CHUNK_MIN_BITS)); + pages_in_chunk = chunk_size / PAGE_SIZE; + + OBD_ALLOC(*cpga, page_count * sizeof(**cpga)); + sptlrpc_enc_pool_get_buf(&src, + clpage->cp_chunk_log_bits + COMPR_CHUNK_MIN_BITS); + sptlrpc_enc_pool_get_buf(&wrkmem, + clpage->cp_chunk_log_bits + COMPR_CHUNK_MIN_BITS + 1); + + if (*cpga == NULL || wrkmem == NULL || src == NULL) + GOTO(out, rc = -ENOMEM); + + for (pga_i = 0; pga_i < page_count; pga_i++) { + if ((pga_i + 1 - chunk_start == pages_in_chunk) || + (pga_i == page_count - 1) || + !can_merge_pages(pga[pga_i], pga[pga_i + 1])) { + clpage = oap2cl_page(brw_page2oap(pga[chunk_start])); + /* TDB: change chunk size, reallocate src */ + CDEBUG(D_SEC, "Chunk [%i,%i], type %i, level %i\n", + chunk_start, pga_i, clpage->cp_comp_type, + clpage->cp_comp_level); + + merge_chunk(pga, chunk_start, pga_i + 1 - chunk_start, + src, &src_size); + dst_size = 2 * chunk_size; + sptlrpc_enc_pool_get_buf(&dst, + clpage->cp_chunk_log_bits + + COMPR_CHUNK_MIN_BITS + 1); + if (dst == NULL) + GOTO(out, rc = -ENOMEM); + + /* + * - applied_type == 0 if no supported algorithms + * found during the previous compress_chunk call + * - last chunk in the request is not compressed to + * preserve the right size of object + */ + if (clpage->cp_comp_type && applied_type && + (pga_i != page_count - 1)) { + done = compress_chunk(obd_name, oa, src, + src_size, dst, &dst_size, + wrkmem, clpage, + &applied_type); + CDEBUG(D_SEC, "Compressed %u, plain %u, rc %i\n", + dst_size, src_size, done); + } else { + done = 0; + dst_size = src_size; + } + + rc = fill_cpga(*cpga, pga, done ? dst : NULL, + chunk_start, cpga_i, dst_size); + + if (!done) { + sptlrpc_enc_pool_put_buf(&dst, + clpage->cp_chunk_log_bits + + COMPR_CHUNK_MIN_BITS + 1); + } else { + (*cpga)[cpga_i]->bp_cmp_chunk = dst; + (*cpga)[cpga_i]->bp_cmp_chunk_size = + clpage->cp_chunk_log_bits + + COMPR_CHUNK_MIN_BITS + 1; + } + + if (rc) + GOTO(out, rc); + + cpga_i += ((dst_size - 1) >> PAGE_SHIFT) + 1; + count++; + chunk_start = pga_i + 1; + } + } + + CDEBUG(D_SEC, "Compressed content: %i pages (%i chunks)\n", cpga_i, + count); + *pcount = cpga_i; +out: + if (wrkmem != NULL) + sptlrpc_enc_pool_put_buf(&wrkmem, + clpage->cp_chunk_log_bits + COMPR_CHUNK_MIN_BITS + 1); + + if (src != NULL) + sptlrpc_enc_pool_put_buf(&src, + clpage->cp_chunk_log_bits + COMPR_CHUNK_MIN_BITS); + + if (rc != 0 && *cpga != NULL) { + free_cpga(*cpga, page_count); + } + RETURN(rc); +} + +int is_chunk_start(struct page *page, struct ll_compr_hdr **ret_header) +{ + struct ll_compr_hdr *header; + int rc = 1; + ENTRY; + + if (page == NULL) + RETURN(0); + + header = (struct ll_compr_hdr *)kmap_atomic(page); + + if (header->llch_magic != LLCH_MAGIC) + rc = 0; + *ret_header = header; + + kunmap_atomic(header); + + RETURN(rc); +} + +static int decompress_chunk(struct osc_brw_async_args *aa, + unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + enum ll_compr_type type, unsigned int lvl) +{ + int rc = 0; + struct crypto_comp *cc = NULL; + struct obdo *oa = aa->aa_oa; + char *obd_name = aa->aa_cli->cl_import->imp_obd->obd_name; + + rc = alloc_comp(&type, lvl, &cc); + if (rc) { + CERROR("%s: Unsupported compression type %i: rc = %d\n", + obd_name, type, rc); + goto fail; + } + + rc = crypto_comp_decompress(cc, in, in_len, out, out_len); + if (rc) { + CERROR("%s: Compression error : rc = %d\n", obd_name, rc); + goto fail; + } + +fail: + if (rc) + CERROR("%s: inode "DFID"\n", obd_name, oa->o_parent_seq, + oa->o_parent_oid, oa->o_parent_ver); + if (cc) + crypto_free_comp(cc); + return rc; +} + +int decompress_request(struct osc_brw_async_args *aa, int page_count) +{ + enum {CS_BEGIN, CS_OUTSIDE, CS_INSIDE} state = CS_BEGIN; + bool is_start = 0; + struct ll_compr_hdr *tllch = NULL, *llch = NULL; + int chunk_start = 0; + struct brw_page **pga = aa->aa_ppga; + char *src = NULL; + char *dst = NULL; + unsigned int dst_size; + int pages_in_chunk = 0; + int chunk_size = 0; + int chunk_bits = 0; + int rc = 0; + int done = 0; + int count = 0; + int i = 0; + unsigned int src_size; + ENTRY; + + for (i = 0; i <= page_count; i++) { + + if (i < page_count) + is_start = is_chunk_start(pga[i]->pg, &tllch); + + switch (state) { + case CS_BEGIN: + chunk_start = 0; + count = 0; + fallthrough; + case CS_OUTSIDE: + if ((i == page_count) || !is_start) { + state = CS_OUTSIDE; + break; + } + chunk_start = i; + state = CS_INSIDE; + llch = tllch; + chunk_bits = llch->llch_chunk_log_bits + + COMPR_CHUNK_MIN_BITS; + chunk_size = 1 << chunk_bits; + pages_in_chunk = chunk_size / PAGE_SIZE; + if (!src) { /* get chunk size once */ + chunk_bits = llch->llch_chunk_log_bits + + COMPR_CHUNK_MIN_BITS; + chunk_size = 1 << chunk_bits; + pages_in_chunk = chunk_size / PAGE_SIZE; + } + CDEBUG(D_SEC, "chunk_size: %i, pages_in_chunk: %i\n", + chunk_size, pages_in_chunk); + break; + case CS_INSIDE: + LASSERT(pages_in_chunk != 0); + if ((i < page_count && + (i - chunk_start) < pages_in_chunk && + !is_start) || llch == NULL) + break; + if (!src) { + LASSERT(chunk_size != 0); + sptlrpc_enc_pool_get_buf((void **)&src, + chunk_bits + 1); + sptlrpc_enc_pool_get_buf((void **)&dst, + chunk_bits + 1); + if (src == NULL || dst == NULL) + GOTO(out, rc = -ENOMEM); + } + + CDEBUG(D_SEC, "Merge chunk start %i, i: %i, src: %px\n", + chunk_start, i, src); + merge_chunk(pga, chunk_start, i - chunk_start, src, + &src_size); + LASSERT(src_size <= chunk_size); + dst_size = 2 * chunk_size; + CDEBUG(D_SEC, "Compressed size %lu, type %i\n", + llch->llch_compr_size + sizeof(*llch), + llch->llch_compr_type); + + rc = decompress_chunk(aa, + src + llch->llch_header_size, + llch->llch_compr_size, + dst, &dst_size, + llch->llch_compr_type, + llch->llch_compr_level); + if (rc) + GOTO(out, rc); + + CDEBUG(D_SEC, "Decompressed size %u, status %i\n", + dst_size, done); + + LASSERT(dst_size <= chunk_size); + unmerge_chunk(pga, chunk_start, i - chunk_start, + dst, dst_size); + + count++; + i--; + state = CS_OUTSIDE; + break; + default: + GOTO(out, rc = -EIO); + } + + } + CDEBUG(D_SEC, "Decompressed %i pages (%i chunks)\n", page_count, count); +out: + if (src != NULL) + sptlrpc_enc_pool_put_buf(&src, chunk_bits + 1); + + if (dst != NULL) + sptlrpc_enc_pool_put_buf(&dst, chunk_bits + 1); + + RETURN(rc); +} diff --git a/lustre/osc/osc_compress.h b/lustre/osc/osc_compress.h new file mode 100644 index 0000000..80c7010 --- /dev/null +++ b/lustre/osc/osc_compress.h @@ -0,0 +1,38 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ + +/* + * Copyright (c) 2023, DataDirect Networks Inc, all rights reserved. + * Author: Artem Blagodarenko + */ + +#ifndef OSC_COMPRESS_H +#define OSC_COMPRESS_H + +int compress_request(const char *obd_name, struct obdo *oa, + struct brw_page **pga, struct brw_page ***cpga, + u32 page_count, int *pcount); + +int decompress_request(struct osc_brw_async_args *aa, int page_count); + +void free_cpga( struct brw_page **cpga, u32 page_count); + +#endif /* OSC_COMPRESS_H */ diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index a57234f..f5f2e6a 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -180,6 +180,9 @@ extern unsigned long osc_cache_shrink_count(struct shrinker *sk, struct shrink_control *sc); extern unsigned long osc_cache_shrink_scan(struct shrinker *sk, struct shrink_control *sc); + +int can_merge_pages(struct brw_page *p1, struct brw_page *p2); + static inline unsigned int osc_max_write_chunks(const struct client_obd *cli) { /* diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index bac102b..cc4e75d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -49,6 +49,7 @@ #include #include #include "osc_internal.h" +#include "osc_compress.h" #include atomic_t osc_pool_req_count; @@ -1155,25 +1156,25 @@ static int check_write_rcs(struct ptlrpc_request *req, return (0); } -static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) +int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { - if (p1->flag != p2->flag) { + if (p1->flag != p2->flag) { unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | OBD_BRW_SYNC | OBD_BRW_ASYNC | OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC | OBD_BRW_SYS_RESOURCE); - /* warn if we try to combine flags that we don't know to be - * safe to combine */ - if (unlikely((p1->flag & mask) != (p2->flag & mask))) { - CWARN("Saw flags 0x%x and 0x%x in the same brw, please " - "report this at https://jira.whamcloud.com/\n", - p1->flag, p2->flag); - } - return 0; - } + /* warn if we try to combine flags that we don't know to be + * safe to combine */ + if (unlikely((p1->flag & mask) != (p2->flag & mask))) { + CWARN("Saw flags 0x%x and 0x%x in the same brw, please " + "report this at https://jira.whamcloud.com/\n", + p1->flag, p2->flag); + } + return 0; + } - return (p1->off + p1->count == p2->off); + return (p1->off + p1->count == p2->off); } #if IS_ENABLED(CONFIG_CRC_T10DIF) @@ -1498,9 +1499,17 @@ static inline void osc_release_bounce_pages(struct brw_page **pga, static int osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa, - u32 page_count, struct brw_page **pga, + u32 *pcount, struct brw_page ***orig_pga, struct ptlrpc_request **reqp, int resend) { + /* This values will be replaced */ + struct brw_page **pga = *orig_pga; + u32 page_count = *pcount; + + /* Save original to store in the aa */ + struct brw_page **ncpga = *orig_pga; + u32 ncpcount = *pcount; + struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; struct ost_body *body; @@ -1516,6 +1525,8 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa, bool directio = false; bool enable_checksum = true; struct cl_page *clpage; + /* TDB: Get from settings */ + int compressed = 1; ENTRY; if (pga[0]->pg) { @@ -1541,6 +1552,41 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa, if (req == NULL) RETURN(-ENOMEM); + if (opc == OST_WRITE && compressed) { + if (inode && IS_ENCRYPTED(inode)) { + CWARN("%s: Encrypted, can not be compressed: "DFID"\n", + obd_name, oa->o_parent_seq, oa->o_parent_oid, + oa->o_parent_ver); + compressed = 0; + goto skip_compression; + } + + /* + * If compression disabled for the file -1 is set to + * all pages, so it is enough to check only one + * */ + if ((oap2cl_page(brw_page2oap(pga[0])))->cp_comp_enabled + == false) { + compressed = 0; + goto skip_compression; + } + + rc = compress_request(obd_name, oa, pga, &pga, page_count, + &page_count); + if (rc) { + /* + * TDB: disable for a file if e.g. 512KB+ of + * incompressible chunks are hit in a row for + * the same file + */ + page_count = *pcount; + compressed = 0; + } + *pcount = page_count; + *orig_pga = pga; + } + +skip_compression: if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode) && llcrypt_has_encryption_key(inode)) { struct page **pa = NULL; @@ -1809,21 +1855,14 @@ no_bulk: int poff = pg->off & ~PAGE_MASK; LASSERT(pg->count > 0); - /* make sure there is no gap in the middle of page array */ - LASSERTF(page_count == 1 || - (ergo(i == 0, poff + pg->count == PAGE_SIZE) && - ergo(i > 0 && i < page_count - 1, - poff == 0 && pg->count == PAGE_SIZE) && - ergo(i == page_count - 1, poff == 0)), - "i: %d/%d pg: %p off: %llu, count: %u\n", - i, page_count, pg, pg->off, pg->count); - LASSERTF(i == 0 || pg->off > pg_prev->off, + /* gap in the middle of page array can be with compression */ + LASSERTF(i == 0 || pg->off > pg_prev->off, "i %d p_c %u pg %p [pri %lu ind %lu] off %llu" " prev_pg %p [pri %lu ind %lu] off %llu\n", - i, page_count, - pg->pg, page_private(pg->pg), pg->pg->index, pg->off, - pg_prev->pg, page_private(pg_prev->pg), - pg_prev->pg->index, pg_prev->off); + i, page_count, + pg->pg, page_private(pg->pg), pg->pg->index, pg->off, + pg_prev->pg, page_private(pg_prev->pg), + pg_prev->pg->index, pg_prev->off); LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) == (pg->flag & OBD_BRW_SRVLOCK)); if (short_io_size != 0 && opc == OST_WRITE) { @@ -1928,6 +1967,11 @@ no_bulk: ptlrpc_request_set_replen(req); aa = ptlrpc_req_async_args(aa, req); + if (opc == OST_WRITE && compressed) { + aa->aa_ncpage_count = ncpcount; + aa->aa_ncppga = ncpga; + } + aa->aa_oa = oa; aa->aa_requested_nob = requested_nob; aa->aa_nio_count = niocount; @@ -2111,6 +2155,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) struct inode *inode = NULL; unsigned int blockbits = 0, blocksize = 0; struct cl_page *clpage; + int rc2; ENTRY; @@ -2376,6 +2421,13 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) } } + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_READ) { + rc2 = decompress_request(aa, aa->aa_page_count); + if (rc2) { + CERROR("%s: Request can't be decompressed: rc = %d\n", + obd_name, rc2); + } + } out: if (rc >= 0) lustre_get_wire_obdo(&req->rq_import->imp_connect_data, @@ -2398,8 +2450,8 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - aa->aa_cli, aa->aa_oa, aa->aa_page_count, - aa->aa_ppga, &new_req, 1); + aa->aa_cli, aa->aa_oa, &aa->aa_page_count, + &aa->aa_ppga, &new_req, 1); if (rc) RETURN(rc); @@ -2539,8 +2591,11 @@ static int brw_interpret(const struct lu_env *env, unsigned long valid = 0; struct cl_object *obj; struct osc_async_page *last; + if (aa->aa_ncppga) + last = brw_page2oap(aa->aa_ncppga[aa->aa_ncpage_count - 1]); + else + last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); - last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); obj = osc2cl(last->oap_obj); cl_object_attr_lock(obj); @@ -2602,7 +2657,17 @@ static int brw_interpret(const struct lu_env *env, aa->aa_requested_nob : req->rq_bulk->bd_nob_transferred); - osc_release_ppga(aa->aa_ppga, aa->aa_page_count); + if (aa->aa_ncppga) { + osc_release_ppga(aa->aa_ncppga, aa->aa_ncpage_count); + /* + * allocated aa_ncpage_count for ppga and used + * only aa_page_count + */ + free_cpga(aa->aa_ppga, aa->aa_ncpage_count); + } else { + osc_release_ppga(aa->aa_ppga, aa->aa_page_count); + } + ptlrpc_lprocfs_brw(req, transferred); spin_lock(&cli->cl_loi_list_lock); @@ -2745,7 +2810,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, } sort_brw_pages(pga, page_count); - rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 0); + rc = osc_brw_prep_request(cmd, cli, oa, &page_count, &pga, &req, 0); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, rc); @@ -2849,6 +2914,8 @@ void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start) struct obdo oa; struct brw_page bpg = { .off = start, .count = 1}; struct brw_page *pga = &bpg; + struct brw_page **ppga = &pga; + u32 page_count = 1; int rc; memset(&oa, 0, sizeof(oa)); @@ -2857,8 +2924,8 @@ void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start) /* For updated servers - don't do a read */ oa.o_flags = OBD_FL_NORPC; - rc = osc_brw_prep_request(OBD_BRW_READ, osc_cli(osc), &oa, 1, &pga, - &req, 0); + rc = osc_brw_prep_request(OBD_BRW_READ, osc_cli(osc), &oa, &page_count, + &ppga, &req, 0); /* If we succeeded we ship it off, if not there's no point in doing * anything. Also no resends. diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 2cc96e4..85597b7 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -45,6 +45,7 @@ #include #include + void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' @@ -778,6 +779,64 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LF_UNSET != 0x00000002); BUILD_BUG_ON(LADVISE_MAGIC != 0x1adf1ce0); + /* Checks for struct ll_compr_hdr */ + LASSERTF((int)sizeof(struct ll_compr_hdr) == 32, "found %lld\n", + (long long)(int)sizeof(struct ll_compr_hdr)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_header_size) == 6, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_header_size)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_header_size) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_header_size)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_exta_flags) == 7, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_exta_flags)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_exta_flags) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_exta_flags)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_type) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_type)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_type) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_type)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_chunk_log_bits) == 10, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_chunk_log_bits)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_chunk_log_bits) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_chunk_log_bits)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_size) == 12, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_size)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_size)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_uncompr_csum) == 20, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_uncompr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_uncompr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_uncompr_csum)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_csum) == 24, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_csum)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_reserved) == 16, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_reserved)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_reserved) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_reserved)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_hdr_csum) == 28, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_hdr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_hdr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_hdr_csum)); + LASSERTF(LLCH_MAGIC == 211353433660026, "found %lld\n", + (long long)LLCH_MAGIC); + LASSERTF(LL_COMPR_TYPE_NONE == 0, "found %lld\n", + (long long)LL_COMPR_TYPE_NONE); + LASSERTF(LL_COMPR_TYPE_FAST == 1, "found %lld\n", + (long long)LL_COMPR_TYPE_FAST); + LASSERTF(LL_COMPR_TYPE_BEST == 2, "found %lld\n", + (long long)LL_COMPR_TYPE_BEST); + LASSERTF(LL_COMPR_TYPE_GZIP == 3, "found %lld\n", + (long long)LL_COMPR_TYPE_GZIP); + LASSERTF(LL_COMPR_TYPE_LZ4 == 4, "found %lld\n", + (long long)LL_COMPR_TYPE_LZ4); + LASSERTF(LL_COMPR_TYPE_LZ4HC == 5, "found %lld\n", + (long long)LL_COMPR_TYPE_LZ4HC); + LASSERTF(LL_COMPR_TYPE_LZO == 6, "found %lld\n", + (long long)LL_COMPR_TYPE_LZO); + LASSERTF(COMPR_CHUNK_MIN_BITS == 16, "found %lld\n", + (long long)COMPR_CHUNK_MIN_BITS); + /* Checks for struct lustre_handle */ LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n", (long long)(int)sizeof(struct lustre_handle)); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b0eb357..c92e370 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -27513,7 +27513,182 @@ test_450() { } run_test 450 "Test loading bitmaps function" +compress_type() { + local orig=$1 + local random_data=${2:false} + local orig_short=$(basename $orig) + local stored_dir=$DIR/$tdir/ + local stored=$stored_dir/$orig_short.$t1$l1$c1 + local stored2=$TMP/$tdir/$tfile.dec_$orig_short.$t1$l1$c1cpy + local decomp1=$TMP/$tdir/$tfile.dec_$orig_short.$t1$l1$c1 + local decomp3=$TMP/$tdir/$tfile.dec_$orig_short.$t1$l1$c1.32.1.5 + local decomp4=$TMP/$tdir/$tfile.dec_$orig_short.$t1$l1$c1.32.1.5.orig + + $LFS setstripe -E 512K -Z $t1:$l1 -E 768K -Z none -E -1 -Z $t1:$l1 \ + --compress-chunk=$c1 $stored_dir || + error "set a compress component in $stored failed" + + echo ">>>>>" + dd if=$orig of=$stored bs=65536 &>/dev/null || + error "dd to $stored failed" + + sync; echo 3 > /proc/sys/vm/drop_caches + echo "<<<<<" + + dd if=$stored of=$decomp1 bs=65536 &>/dev/null || + error "dd to $decomp1 failed" + + size1=$(stat -c %s $orig) + size2=$(stat -c %s $decomp1) + echo size1: $size1 + echo size2: $size2 + [ $size1 -eq $size2 ] || error "sizes differ" + + blocks1=$(stat -c %b $orig) + blocks2=$(stat -c %b $stored) + echo blocks1: $blocks1 + echo blocks2: $blocks2 + $random_data || [ $blocks2 -lt $blocks1 ] || \ + error "blocks count should be less" + + cmp --verbose $orig $decomp1 || + error "decompression failed" + + # Reading starting the 480K offset + # on compressed/plain data board + dd if=$stored of=$decomp3 \ + bs=32k count=2 skip=15 &>/dev/null || + error "dd to $decomp3 failed" + + dd if=$orig of=$decomp4 \ + bs=32k count=2 skip=15 &>/dev/null || + error "dd to $decomp4 failed" + + cmp --verbose $decomp3 $decomp4 || error "decompression failed" +} + +compress_content() { + t1="lzo" l1=5 c1="64k" compress_type $1 + t1="lzo" l1=5 c1="128k" compress_type $1 + t1="lz4" l1=5 c1="64k" compress_type $1 + t1="lz4hc" l1=5 c1="64k" compress_type $1 + t1="gzip" l1=5 c1="64k" compress_type $1 +} + +enable_compression() { + $LCTL set_param -n llite.*.enable_compression 1 +} + +disable_compression() { + $LCTL set_param -n llite.*.enable_compression 0 +} + +test_460a() { + (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || + skip "Need MDS version at least 2.14.0.85" + + rm -Rf $DIR/$tdir; rm -Rf $TMP/$tdir + + test_mkdir $DIR/$tdir + test_mkdir $TMP/$tdir + + local t1 + local l1 + local c1 + local tmp_hdf=$TMP/$tfile.hdf + local tmp_zero=$TMP/$tfile.zero + local tmp_rand=$TMP/$tfile.rand + local hdf=$LUSTRE/tests/AMSR_E_L3_DailyOcean_V05_20111003.hdf + + if ! [ -e $hdf.bz2 ]; then + echo "HDF file not present" + exit 0 + fi + + if type -p bzcat >/dev/null; then + bzcat $hdf.bz2 > $tmp_hdf + elif type -p bunzip2 >/dev/null; then + cp $hdf.bz2 $tmp_hdf.bz2 || error "cp $tmp_hdf.bz2" + bunzip2 $tmp_hdf.bz2 || error "bunzip2 $tmp_hdf.bz2" + else + skip_env "bunzip2 is not installed" + fi + + stack_trap "rm -Rf $DIR/$tdir; rm -Rf $TMP/$tdir; disable_compression" + + enable_compression + + # test script itself, as a sample text file to compress + compress_content $0 false + + # hdf file - mix of text and binary data + compress_content $tmp_hdf false + + dd if=/dev/zero of=$tmp_zero bs=2M count=2 conv=fsync &>/dev/null || + error "dd to $tmp_zero failed" + + # file with zeros + compress_content $tmp_zero false + + dd if=/dev/urandom of=$tmp_rand bs=2M count=2 conv=fsync &>/dev/null || + error "dd to $tmp_rand failed" + + # file with random data + compress_content $tmp_rand true + + # let's compress bz2 file + compress_content $hdf.bz2 true + + echo "md5sum:" + find $DIR/$tdir -type f -exec md5sum {} + | LC_ALL=C sort + find $TMP/$tdir -type f -exec md5sum {} + | LC_ALL=C sort + + echo "Stored:" + du -h $DIR/$tdir + ls -ls $DIR/$tdir + + echo "Decompressed:" + du -h $TMP/$tdir + ls -ls $TMP/$tdir +} +run_test 460a "Compress/decompress text test" + +test_460b() { + (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || + skip "Need MDS version at least 2.14.0.85" + + local stored=$DIR/$tdir/foofile + test_mkdir $DIR/$tdir + + stack_trap "rm -Rf $DIR/$tdir; disable_compression" + enable_compression + + $LFS setstripe -E 512K -Z gzip:5 -E 768K -Z none -E -1 -Z lz4:5 \ + --compress-chunk=64 $stored || + error "set a compress component in $stored failed" + + #define OBD_FAIL_OSC_WRONG_COMP_ALG 0x418 + # one fail, LL_COMPR_TYPE_FAST used + lctl set_param fail_loc=0x80000418 + dd if=/dev/zero of=$stored bs=64K count=5 conv=fsync || + error "dd to $stored failed" + + # two fails, LL_COMPR_TYPE_LZO used + lctl set_param fail_val=3 fail_loc=0x10000418 + dd if=/dev/zero of=$stored bs=64K count=5 conv=fsync || + error "dd to $stored failed" + + # three fails, uncompressed, plain + lctl set_param fail_val=4 fail_loc=0x10000418 + dd if=/dev/zero of=$stored bs=64K count=5 conv=fsync || + error "dd to $stored failed" +} +run_test 460b "Try to compress with wrong algo" + test_460d() { + (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || + skip "Need MDS version at least 2.14.0.85" + verify_yaml_available || skip_env "YAML verification not installed" $LCTL get_param -n sptlrpc.page_pools $LCTL get_param -n sptlrpc.page_pools | verify_yaml || diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 482e2b7..17c0c64 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -396,6 +396,43 @@ check_ladvise_hdr(void) } static void +check_ll_compr_hdr(void) +{ + BLANK_LINE(); + CHECK_STRUCT(ll_compr_hdr); + /* bit-field structure member cannot be checked */ + /* + CHECK_MEMBER(ll_compr_hdr, llch_magic:48); + */ + CHECK_MEMBER(ll_compr_hdr, llch_header_size); + CHECK_MEMBER(ll_compr_hdr, llch_exta_flags); + CHECK_MEMBER(ll_compr_hdr, llch_compr_type); + /* bit-field structure member cannot be checked */ + /* + CHECK_MEMBER(ll_compr_hdr, llch_comp_level:4); + CHECK_MEMBER(ll_compr_hdr, llch_flags:4); + */ + CHECK_MEMBER(ll_compr_hdr, llch_chunk_log_bits); + CHECK_MEMBER(ll_compr_hdr, llch_compr_size); + CHECK_MEMBER(ll_compr_hdr, llch_uncompr_csum); + CHECK_MEMBER(ll_compr_hdr, llch_compr_csum); + CHECK_MEMBER(ll_compr_hdr, llch_reserved); + CHECK_MEMBER(ll_compr_hdr, llch_hdr_csum); + + CHECK_VALUE(LLCH_MAGIC); + + CHECK_VALUE(LL_COMPR_TYPE_NONE); + CHECK_VALUE(LL_COMPR_TYPE_FAST); + CHECK_VALUE(LL_COMPR_TYPE_BEST); + CHECK_VALUE(LL_COMPR_TYPE_GZIP); + CHECK_VALUE(LL_COMPR_TYPE_LZ4); + CHECK_VALUE(LL_COMPR_TYPE_LZ4HC); + CHECK_VALUE(LL_COMPR_TYPE_LZO); + + CHECK_VALUE(COMPR_CHUNK_MIN_BITS); +} + +static void check_lustre_handle(void) { BLANK_LINE(); @@ -3226,6 +3263,7 @@ main(int argc, char **argv) check_lu_dirpage(); check_lu_ladvise(); check_ladvise_hdr(); + check_ll_compr_hdr(); check_lustre_handle(); check_lustre_msg_v2(); check_ptlrpc_body(); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 10bb43b..8e5c993 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -815,6 +815,64 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LF_UNSET != 0x00000002); BUILD_BUG_ON(LADVISE_MAGIC != 0x1adf1ce0); + /* Checks for struct ll_compr_hdr */ + LASSERTF((int)sizeof(struct ll_compr_hdr) == 32, "found %lld\n", + (long long)(int)sizeof(struct ll_compr_hdr)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_header_size) == 6, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_header_size)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_header_size) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_header_size)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_exta_flags) == 7, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_exta_flags)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_exta_flags) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_exta_flags)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_type) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_type)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_type) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_type)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_chunk_log_bits) == 10, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_chunk_log_bits)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_chunk_log_bits) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_chunk_log_bits)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_size) == 12, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_size)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_size)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_uncompr_csum) == 20, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_uncompr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_uncompr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_uncompr_csum)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_compr_csum) == 24, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_compr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_compr_csum)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_reserved) == 16, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_reserved)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_reserved) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_reserved)); + LASSERTF((int)offsetof(struct ll_compr_hdr, llch_hdr_csum) == 28, "found %lld\n", + (long long)(int)offsetof(struct ll_compr_hdr, llch_hdr_csum)); + LASSERTF((int)sizeof(((struct ll_compr_hdr *)0)->llch_hdr_csum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_compr_hdr *)0)->llch_hdr_csum)); + LASSERTF(LLCH_MAGIC == 211353433660026, "found %lld\n", + (long long)LLCH_MAGIC); + LASSERTF(LL_COMPR_TYPE_NONE == 0, "found %lld\n", + (long long)LL_COMPR_TYPE_NONE); + LASSERTF(LL_COMPR_TYPE_FAST == 1, "found %lld\n", + (long long)LL_COMPR_TYPE_FAST); + LASSERTF(LL_COMPR_TYPE_BEST == 2, "found %lld\n", + (long long)LL_COMPR_TYPE_BEST); + LASSERTF(LL_COMPR_TYPE_GZIP == 3, "found %lld\n", + (long long)LL_COMPR_TYPE_GZIP); + LASSERTF(LL_COMPR_TYPE_LZ4 == 4, "found %lld\n", + (long long)LL_COMPR_TYPE_LZ4); + LASSERTF(LL_COMPR_TYPE_LZ4HC == 5, "found %lld\n", + (long long)LL_COMPR_TYPE_LZ4HC); + LASSERTF(LL_COMPR_TYPE_LZO == 6, "found %lld\n", + (long long)LL_COMPR_TYPE_LZO); + LASSERTF(COMPR_CHUNK_MIN_BITS == 16, "found %lld\n", + (long long)COMPR_CHUNK_MIN_BITS); + /* Checks for struct lustre_handle */ LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n", (long long)(int)sizeof(struct lustre_handle)); -- 1.8.3.1