From 9b20f97fcf3326395a8ff8b04d5d107c3cbd32e9 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Tue, 13 Jun 2023 13:26:09 -0400 Subject: [PATCH] EX-7601 llite: restrict overwrites during preview EX-7601 is an issue where when modifying a compressed file we do not correctly read-up and re-write existing compressed data. To avoid this, we can only allow writes which are not aligned to compression chunk size when they are not overwriting existing data, ie, when they are extending the file. This returns EINVAL for all writes to compressed files which are not either chunk aligned or extending the file. This should prevent users from hitting the data corruption issue but still allows some basic usage. This is intended just for the preview period. Signed-off-by: Patrick Farrell Change-Id: I0eea01e2249866a074afd0d0642fe6dce9a49664 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51259 Tested-by: Andreas Dilger Reviewed-by: Andreas Dilger --- lustre/include/cl_object.h | 18 +++++++++++--- lustre/llite/vvp_io.c | 23 +++++++++++++++++ lustre/lov/lov_object.c | 17 +++++++++++-- lustre/tests/sanity.sh | 62 +++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 108 insertions(+), 12 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 26587ac..abd261b 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -2012,17 +2012,27 @@ struct cl_io { */ ci_parallel_dio:1, /** - * This IO is on a compressed file + * set if IO is on a compressed file */ - ci_compressed_io:1; + ci_compressed_io:1, + /** + * Compression chunk size + */ + ci_compr_chunk_bits:7, /** * Bypass quota check */ - unsigned ci_noquota:1, + ci_noquota:1, /** * io_uring direct IO with flags IOCB_NOWAIT. */ - ci_iocb_nowait:1; + ci_iocb_nowait:1, + /** + * temporary workaround for compression - tells the lower layers the + * write is probably size expanding and doesn't have to be chunk + * aligned + */ + ci_size_extend_compression:1; /** * How many times the read has retried before this one. * Set by the top level and consumed by the LOV. diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 5390768..ca11feb 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1266,6 +1266,13 @@ static int vvp_io_write_start(const struct lu_env *env, size_t nob = io->ci_nob; struct iov_iter iter; size_t written = 0; + int chunk_size; + /* this i_size_read() is potentially racy, but it should be mostly + * reliable and this is just a temporary workaround for the compression + * preview + */ + loff_t inode_size = i_size_read(inode); + bool write_beyond_eof = false; ENTRY; @@ -1293,6 +1300,22 @@ static int vvp_io_write_start(const struct lu_env *env, file_dentry(file)->d_name.name, pos, pos + cnt); + if (pos >= inode_size) + write_beyond_eof = true; + + chunk_size = (1 << (io->ci_compr_chunk_bits + 16)); + + /* compressed files require chunk-aligned writes for overwrites + * writes at EOF are OK because they do not update existing data + * EX-7601 + */ + if (io->ci_compressed_io && !write_beyond_eof && + (pos % chunk_size || cnt % chunk_size)) { + CERROR("Compression preview requires writes to be chunk size aligned or at EOF, write pos: %llu, bytes: %lu, chunk_size: %d\n", + pos, cnt, chunk_size); + RETURN(-EINVAL); + } + /* The maximum Lustre file size is variable, based on the OST maximum * object size and number of stripes. This needs another check in * addition to the VFS checks earlier. */ diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 50dd1c8..459cd10 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -1513,15 +1513,28 @@ static int lov_io_init(const struct lu_env *env, struct cl_object *obj, lov_foreach_layout_entry(lov, lle) { if (lle->lle_lsme->lsme_pattern & LOV_PATTERN_COMPRESS) { io->ci_compressed_io = true; + /* a single IO may hit different components with + * different compression chunk sizes; since this is + * used for chunk-alignment, we can take the largest + * chunk size since chunks are all powers of two and + * aligning to the largest size will also align to any + * smaller sizes + */ + if (lle->lle_lsme->lsme_compr_chunk_log_bits > + io->ci_compr_chunk_bits) { + io->ci_compr_chunk_bits = + lle->lle_lsme->lsme_compr_chunk_log_bits; + } break; } } CDEBUG(D_INODE, - DFID "io %p type %d ignore/verify layout %d/%d, compressed %d\n", + DFID "io %p type %d ignore/verify layout %d/%d, compressed %x, chunk_size %d\n", PFID(lu_object_fid(&obj->co_lu)), io, io->ci_type, io->ci_ignore_layout, io->ci_verify_layout, - io->ci_compressed_io); + io->ci_compressed_io, + io->ci_compressed_io ? (1 << (io->ci_compr_chunk_bits + 16)) : 0); /* IO type CIT_MISC with ci_ignore_layout set are usually invoked from * the OSC layer. It shouldn't take lov layout conf lock in that case, diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index a88546a..43b178c 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -27597,8 +27597,8 @@ disable_compression() { } test_460a() { - (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || - skip "Need MDS version at least 2.14.0.85" + (( MDS1_VERSION >= $(version_code 2.14.0.89) )) || + skip "Need MDS version at least 2.14.0.89" rm -Rf $DIR/$tdir; rm -Rf $TMP/$tdir @@ -27667,8 +27667,8 @@ test_460a() { run_test 460a "Compress/decompress text test" test_460b() { - (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || - skip "Need MDS version at least 2.14.0.85" + (( MDS1_VERSION >= $(version_code 2.14.0.89) )) || + skip "Need MDS version at least 2.14.0.89" local stored=$DIR/$tdir/foofile test_mkdir $DIR/$tdir @@ -27698,9 +27698,59 @@ test_460b() { } run_test 460b "Try to compress with wrong algo" +# This test verifies the write restrictions for the preview version related to +# EX-7601, this test will be removed once EX-7601 is resolved +# Restrictions are as follows: +# Chunk aligned writes are always allowed +# Non-chunk-aligned writes are only allowed if they are extending the file, ie, +# if they are not overwriting existing data +test_460c() { + (( MDS1_VERSION >= $(version_code 2.14.0.89) )) || + skip "Need MDS version at least 2.14.0.89" + + local stored=$DIR/$tdir/foofile + local tmpfile=$TMP/$tdir/footmp + test_mkdir $DIR/$tdir + test_mkdir $TMP/$tdir + + stack_trap "rm -Rf $DIR/$tdir; rm -Rf $TMP/$tdir; disable_compression" + enable_compression + + $LFS setstripe -E 2M -Z gzip:5 -E 10M -Z none -E -1 -Z lz4:5 \ + --compress-chunk=64 $stored || + error "set a compress component in $stored failed" + + # Can do non-chunk-aligned IO when extending the file + dd if=/dev/zero of=$stored bs=37K count=5 || + error "dd to $stored failed" + + sync; echo 3 > /proc/sys/vm/drop_caches + + # Can overwrite an existing file with chunk aligned IO + dd if=/dev/zero of=$stored bs=64K count=5 conv=notrunc || + error "aligned dd to overwrite $stored failed" + + # Test io at 2x chunk size + dd if=/dev/zero of=$stored bs=128K count=5 conv=notrunc || + error "aligned dd to overwrite $stored failed" + + # Cannot do non-chunk aligned IO when overwriting + dd if=/dev/zero of=$stored bs=37K count=5 conv=notrunc && + error "unsafe dd to $stored succeeded" + + # Cannot do non-chunk aligned IO when overwriting (test larger size) + dd if=/dev/zero of=$stored bs=1025K count=5 conv=notrunc && + error "unsafe dd to $stored succeeded" + + # Can do non-chunk aligned IO when extending the file + dd if=/dev/zero of=$stored bs=37K count=5 oflag=append conv=notrunc || + error "unaligned dd to extend $stored failed" +} +run_test 460c "simple verification of preview write restrictions" + test_460d() { - (( MDS1_VERSION >= $(version_code 2.14.0.85) )) || - skip "Need MDS version at least 2.14.0.85" + (( MDS1_VERSION >= $(version_code 2.14.0.89) )) || + skip "Need MDS version at least 2.14.0.89" verify_yaml_available || skip_env "YAML verification not installed" $LCTL get_param -n sptlrpc.page_pools -- 1.8.3.1