From b887194639dedec394bc2cbd6360228469dac278 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Thu, 30 Nov 2023 14:59:07 -0500 Subject: [PATCH] EX-7601 ofd: clear pages in decompression Handling writes to compressed files requires a read-modify-write cycle, which has implications for how we handle reads. Consider the case of a file with an 8 KiB write at offset 0, which is compressed to 4 KiB. Then there is another 4 KiB write at offset 16 KiB. Updating this correctly requires reading the first chunk, then decompressing it. However, this read will go past EOF, because the write has not occurred yet. The OSD read code does not fill in these pages, because read past EOF is not returned to the client (client gets a short read and does not actually use the pages). In our case, however, we must use these pages (from 8 KiB 16 KiB). In the naive version without recompression, we simply write out 0 - 16 KiB, so we must have zeroes in those pages, and once we have recompression, we must compress those pages so we need zeroes in that case too. So we note if a page has data in it after decompression, then if it does not, we clear the page. Note we do NOT set lnb_rc to 0 when we clear a page, because lnb_rc = 0 is used to indicate EOF rather than a gap in the file. Signed-off-by: Patrick Farrell Change-Id: If1d1360185eb087e821167a08e49c9427e29ffc4 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53302 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- lustre/include/obd.h | 3 ++- lustre/obdclass/lustre_compr.c | 14 +++++++++++++- lustre/ofd/ofd_compress.c | 18 ++++++++++++++++-- lustre/osc/osc_compress.c | 3 ++- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 0d80616..fa616fe 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -479,7 +479,8 @@ void chunk_round(__u64 *buf_start, __u64 *buf_end, int chunk_size); int merge_chunk(struct brw_page **pga, struct niobuf_local *lnb, int first, int count, char *merged, unsigned int *size); void unmerge_chunk(struct brw_page **pga, struct niobuf_local *lnb, int first, - int count, char *merged, unsigned int size); + int count, char *merged, unsigned int size, + int pages_per_chunk); struct tgt_thread_big_cache { /* these work out to MAX_BRW_SIZE / PAGE_SIZE * PTR_SIZE, so for diff --git a/lustre/obdclass/lustre_compr.c b/lustre/obdclass/lustre_compr.c index 29a89e0..b922e48 100644 --- a/lustre/obdclass/lustre_compr.c +++ b/lustre/obdclass/lustre_compr.c @@ -399,7 +399,8 @@ int merge_chunk(struct brw_page **pga, struct niobuf_local *lnbs, EXPORT_SYMBOL(merge_chunk); void unmerge_chunk(struct brw_page **pga, struct niobuf_local *lnb, int first, - int count, char *merged, unsigned int size) + int count, char *merged, unsigned int size, + int pages_per_chunk) { struct brw_page *brwpg; struct page *vmpage; @@ -434,9 +435,20 @@ void unmerge_chunk(struct brw_page **pga, struct niobuf_local *lnb, int first, *pg_len = PAGE_SIZE; left -= PAGE_SIZE; } + /* we just put data in this page, so set the rc */ + if (lnb) + lnb[first + i].lnb_rc = *pg_len; CDEBUG(D_SEC, "pg_len: %u, left %u\n", *pg_len, left); } LASSERT(left == 0); + if (lnb) { + for (; first + i < pages_per_chunk; i++) { + CDEBUG(D_SEC, "no data in page %d at %llu, lnb_rc %d - clear page\n", + i, lnb[first + i].lnb_file_offset, lnb[first + i].lnb_rc); + memset(kmap(lnb[i].lnb_page), 0, PAGE_SIZE); + kunmap(lnb[i].lnb_page); + } + } } EXPORT_SYMBOL(unmerge_chunk); diff --git a/lustre/ofd/ofd_compress.c b/lustre/ofd/ofd_compress.c index 5d84696..24a454c 100644 --- a/lustre/ofd/ofd_compress.c +++ b/lustre/ofd/ofd_compress.c @@ -47,6 +47,7 @@ static int decompress_chunk_in_lnb(const char *obd_name, unsigned int src_size; int hdr_size; int rc = 0; + int i; ENTRY; @@ -56,7 +57,7 @@ static int decompress_chunk_in_lnb(const char *obd_name, /* if this chunk isn't compressed, don't uncompress it */ if (!is_chunk_start(lnbs[lnb_start].lnb_page, &llch)) - RETURN(0); + GOTO(out, rc = 0); /* compression type and level in the compressed data can * be different from those set in the layout, because the client @@ -123,9 +124,22 @@ static int decompress_chunk_in_lnb(const char *obd_name, */ unmerge_chunk(NULL, lnbs, lnb_start, ((dst_size - 1) >> PAGE_SHIFT) + 1, - (char *) bounce_dst, dst_size); + (char *) bounce_dst, dst_size, pages_in_chunk); out: + /* even if we don't successfully uncompress, we may have read pages + * which were beyond EOF, so we need to clear them in case we're going + * to write them out + */ + for(i = lnb_start; i < lnb_start + pages_in_chunk; i++) { + /* if there's no data in this page, we must clear it */ + if (lnbs[i].lnb_rc == 0) { + CDEBUG(D_SEC, "no data in page %d at %llu, clearing\n", + i, lnbs[i].lnb_file_offset); + memset(kmap(lnbs[i].lnb_page), 0, PAGE_SIZE); + kunmap(lnbs[i].lnb_page); + } + } if (cc) crypto_free_comp(cc); RETURN(rc); diff --git a/lustre/osc/osc_compress.c b/lustre/osc/osc_compress.c index 26765be..79c557f 100644 --- a/lustre/osc/osc_compress.c +++ b/lustre/osc/osc_compress.c @@ -439,7 +439,8 @@ int decompress_request(struct osc_brw_async_args *aa, int page_count) CDEBUG(D_SEC, "Decompressed size %u, pages %d\n", dst_size, decompressed_pages); - unmerge_chunk(pga, NULL, i, decompressed_pages, dst, dst_size); + unmerge_chunk(pga, NULL, i, decompressed_pages, dst, dst_size, + 0); /* start of the next chunk is at least compressed pages away*/ next_chunk_min = i + compressed_pages - 1; -- 1.8.3.1