From c163e90746ad20c7c382f0d4941a6e1428f21485 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 28 Jul 2024 11:51:48 +0700 Subject: [PATCH] LU-16537 write: fixes to writes at maximal offsets There are issues at maximal offsets. Sanity tests 44[b-e] illustrate the problems. osd_ldiskfs_map_inode_pages(): fix osd-ldiskfs to not hinder write of maximum possible block of a file at block number 2^32-1. It was incorrectly preventing this last block to be written. Fixes 44b and 44c. lov_io_rw_iter_init(): cast loff_t to __u64 so that comparison worked correctly for overflowed loff_t (as it is signed). Fixes 44d which fails for ZFS. lsme_unpack(): Limit maximal size of file to take into account that last stripe of an object may be incomplete. Fixes 44e. ll_do_tiny_write(): Do not allow to go over cl_file_maxbytes. Fixes 44e for aarch64 client/LDISKFS servers. HPE-bug-id: LUS-11475 Signed-off-by: Vladimir Saveliev Signed-off-by: Shaun Tancheff Change-Id: I1c19efda38ab3621fa3b08712308a76330d86cff Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49933 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Petros Koutoupis Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 3 + lustre/lov/lov_ea.c | 18 +++++- lustre/lov/lov_io.c | 2 +- lustre/osd-ldiskfs/osd_io.c | 2 +- lustre/tests/ll_sparseness_write.c | 2 +- lustre/tests/sanity.sh | 123 ++++++++++++++++++++++++++++++++++--- lustre/tests/test-framework.sh | 7 +++ 7 files changed, 144 insertions(+), 13 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index a79b561..17bfaa7 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2411,6 +2411,9 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) if (count >= PAGE_SIZE || (iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE) RETURN(0); + /* For aarch64's 64k pages maxbytes is inside of a page. */ + if (iocb->ki_pos + count > ll_file_maxbytes(inode)) + RETURN(-EFBIG); if (unlikely(lock_inode)) ll_inode_lock(inode); diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index eee64d5..30f9b3a 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -330,10 +330,22 @@ retry_new_ost: lov->desc.ld_tgt_count : lsme->lsme_stripe_count; - if (min_stripe_maxbytes <= (LLONG_MAX / stripe_count)) - lov_bytes = min_stripe_maxbytes * stripe_count; - else + if (min_stripe_maxbytes <= LLONG_MAX / stripe_count) { + /* + * If min_stripe_maxbytes is not an even multiple of + * stripe_size, then the last stripe in each object + * cannot be completely filled and would leave a series + * of unwritable holes in the file. + * Trim the maximum file size to the last full stripe + * for each object, plus the maximum object size for + * the 0th stripe. + */ + lov_bytes = (rounddown(min_stripe_maxbytes, + lsme->lsme_stripe_size) * + (stripe_count - 1)) + min_stripe_maxbytes; + } else { lov_bytes = MAX_LFS_FILESIZE; + } out_dom1: *maxbytes = min_t(loff_t, lov_bytes, MAX_LFS_FILESIZE); } diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 1e22a3f..06f02dd 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -1027,7 +1027,7 @@ static int lov_io_rw_iter_init(const struct lu_env *env, "pos %lld, [%lld, %lld)\n", io->u.ci_rw.crw_pos, lse->lsme_extent.e_start, lse->lsme_extent.e_end); next = min_t(__u64, next, lse->lsme_extent.e_end); - next = min_t(loff_t, next, lio->lis_io_endpos); + next = min_t(__u64, next, lio->lis_io_endpos); io->ci_continue = next < lio->lis_io_endpos; io->u.ci_rw.crw_bytes = next - io->u.ci_rw.crw_pos; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 3efe9d9..a802aee 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -959,7 +959,7 @@ static int osd_ldiskfs_map_inode_pages(struct inode *inode, if (++i != pages) continue; } - if (fp->index + clen >= max_page_index) + if (fp->index + clen > max_page_index) GOTO(cleanup, rc = -EFBIG); /* process found extent */ map.m_lblk = fp->index * blocks_per_page; diff --git a/lustre/tests/ll_sparseness_write.c b/lustre/tests/ll_sparseness_write.c index aea1f2b..f0b822c 100644 --- a/lustre/tests/ll_sparseness_write.c +++ b/lustre/tests/ll_sparseness_write.c @@ -50,7 +50,7 @@ int main(int argc, char **argv) { int p_size; - unsigned int offset; + loff_t offset; char *filename; int fd; char buf[] = "+++"; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 913fb6e..52d5384 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -5890,6 +5890,122 @@ test_44a() { } run_test 44a "test sparse pwrite ===============================" +test_44b() { + (( $OST1_VERSION >= $(version_code 2.15.61.137) )) || + skip "Need OST >= 2.15.61.137 for large object handling" + + $LFS setstripe -c 1 $DIR/$tfile || error "setstripe failed" + local off=$((2**32*4096-8192)) + dd if=/dev/zero of=$DIR/$tfile bs=1 count=1 seek=$off conv=notrunc || + error "dd failed" + cancel_lru_locks osc + $CHECKSTAT -s $((2**32*4096-8192+1)) $DIR/$tfile || error "wrong size" +} +run_test 44b "write one byte at offset 0xfffffffe000" + +test_44c() { + (( $OST1_VERSION >= $(version_code 2.15.61.137) )) || + skip "Need OST >= 2.15.61.137 for large object handling" + + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + local max_object_bytes=$(import_param $osc_tgt max_object_bytes) + + $LFS setstripe -c 1 $DIR/$tfile || error "setstripe failed" + + dd if=/dev/zero of=$DIR/$tfile conv=notrunc bs=1 count=1 \ + seek=$((max_object_bytes - 1)) || error "dd failed" + cancel_lru_locks osc + $CHECKSTAT -s $max_object_bytes $DIR/$tfile || error "wrong size" +} +run_test 44c "write 1 byte at max_object_bytes - 1 offset" + +test_44d() { + (( $OST1_VERSION >= $(version_code 2.15.61.137) )) || + skip "Need OST >= 2.15.61.137 for large object handling" + + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + local max_object_bytes=$(import_param $osc_tgt max_object_bytes) + + $LFS setstripe -c 2 $DIR/$tfile || error "setstripe failed" + + local stripe_size=$($LFS getstripe -S $DIR/$tfile) + local off=$((max_object_bytes & ~(stripe_size - 1))) + + $TRUNCATE $DIR/$tfile $off + dd if=/dev/zero of=$DIR/$tfile bs=1 count=1 seek=$off + local rc1=$? + dd if=/dev/zero of=$DIR/$tfile oflag=append conv=notrunc bs=1 count=1 \ + seek=$off + local rc2=$? + [[ $rc1 -eq 0 && $rc2 -eq 0 ]] || error "one of dd commands failed" +} +run_test 44d "if write at position fails (EFBIG), so should do append" + +# write file until maximal size is reached +max_file_size() { + local file=$1 + + off=1 + minoff=1 + while true; do + echo a | dd of=$1 bs=1 count=1 conv=notrunc seek=$off status=progress \ + 2>/dev/null + [[ $? -ne 0 ]] && break + minoff=$off + off=$(echo "$off * 2" | bc) + done + maxoff=$off + minoff_1=$(echo $off + 1 | bc) + while [[ maxoff -ne minoff_1 ]]; do + off=$(echo "($maxoff + $minoff) / 2" | bc) + echo a | dd of=$1 bs=1 count=1 conv=notrunc seek=$off status=progress \ + 2>/dev/null + [[ $? -eq 0 ]] && minoff=$off || maxoff=$off + minoff_1=$(echo $off + 1 | bc) + done + stat -c %s $file +} + +test_44e_write_read() +{ + local ifile=$1 + local ofile=$2 + local stripe_count=$3 + local stripe_size=$($LFS getstripe -S $ofile) + local file_size=$(max_file_size $ofile) + local write_count=$((stripe_count * stripe_size)) + local offset=$((file_size - write_count)) + + dd if=/dev/urandom of=$ifile bs=$write_count count=1 || + error "failed to write random data" + + dd if=$ifile of=$ofile bs=$write_count count=1 oflag=seek_bytes \ + seek=$offset conv=notrunc || error "dd failed" + cancel_lru_locks osc + cmp $ifile $ofile -i 0:$offset -n $write_count || error "cmp failed" +} + +test_44e() { + (( $OST1_VERSION >= $(version_code 2.15.61.137) )) || + skip "Need OST >= 2.15.61.137 for large object handling" + + local TF="$(mktemp --tmpdir -u $tfile.XXXXXX)" + + $LFS setstripe -S 1M -c $OSTCOUNT $DIR/$tfile || + error "lfs setstripe -S 1M -c $OSTCOUNT failed" + test_44e_write_read $TF $DIR/$tfile $OSTCOUNT + rm -f $DIR/$tfile + rm -f $TF + + $LFS setstripe -S 1M -c $OSTCOUNT -C $((OSTCOUNT * 2)) $DIR/$tfile || + error "lfs setstripe -S 1M -c $OSTCOUNT -C [* 2] failed" + test_44e_write_read $TF $DIR/$tfile $((OSTCOUNT * 2)) + + rm -f $DIR/$tfile + rm -f $TF +} +run_test 44e "write and read maximal stripes" + dirty_osc_total() { tot=0 for d in `lctl get_param -n ${OSC}.*.cur_dirty_bytes`; do @@ -10127,13 +10243,6 @@ test_64c() { } run_test 64c "verify grant shrink" -import_param() { - local tgt=$1 - local param=$2 - - $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }" -} - # this does exactly what osc_request.c:osc_announce_cached() does in # order to calculate max amount of grants to ask from server want_grant() { diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index a309253..63fcb2d 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -8887,6 +8887,13 @@ wait_clients_import_ready() { wait_clients_import_state "$1" "$2" "\(FULL\|IDLE\)" } +import_param() { + local tgt=$1 + local param=$2 + + $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }" +} + wait_osp_active() { local facet=$1 local tgt_name=$2 -- 1.8.3.1