Whamcloud - gitweb
LU-16537 write: fixes to writes at maximal offsets 33/49933/19
authorShaun Tancheff <shaun.tancheff@hpe.com>
Sun, 28 Jul 2024 04:51:48 +0000 (11:51 +0700)
committerOleg Drokin <green@whamcloud.com>
Fri, 16 Aug 2024 23:48:21 +0000 (23:48 +0000)
There are issues at maximal offsets.
Sanity tests 44[b-e] illustrate the problems.

osd_ldiskfs_map_inode_pages():
  fix osd-ldiskfs to not hinder write of maximum possible block of a
  file at block number 2^32-1.  It was incorrectly preventing this
  last block to be written.
        Fixes 44b and 44c.

lov_io_rw_iter_init():
  cast loff_t to __u64 so that comparison worked correctly for
  overflowed loff_t (as it is signed).
        Fixes 44d which fails for ZFS.

lsme_unpack():
  Limit maximal size of file to take into account that last stripe of
  an object may be incomplete.
        Fixes 44e.

ll_do_tiny_write():
  Do not allow to go over cl_file_maxbytes.
        Fixes 44e for aarch64 client/LDISKFS servers.

HPE-bug-id: LUS-11475
Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Change-Id: I1c19efda38ab3621fa3b08712308a76330d86cff
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49933
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Petros Koutoupis <petros.koutoupis@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/file.c
lustre/lov/lov_ea.c
lustre/lov/lov_io.c
lustre/osd-ldiskfs/osd_io.c
lustre/tests/ll_sparseness_write.c
lustre/tests/sanity.sh
lustre/tests/test-framework.sh

index a79b561..17bfaa7 100644 (file)
@@ -2411,6 +2411,9 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
        if (count >= PAGE_SIZE ||
            (iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
                RETURN(0);
+       /* For aarch64's 64k pages maxbytes is inside of a page. */
+       if (iocb->ki_pos + count > ll_file_maxbytes(inode))
+               RETURN(-EFBIG);
 
        if (unlikely(lock_inode))
                ll_inode_lock(inode);
index eee64d5..30f9b3a 100644 (file)
@@ -330,10 +330,22 @@ retry_new_ost:
                                            lov->desc.ld_tgt_count :
                                            lsme->lsme_stripe_count;
 
-               if (min_stripe_maxbytes <= (LLONG_MAX / stripe_count))
-                       lov_bytes = min_stripe_maxbytes * stripe_count;
-               else
+               if (min_stripe_maxbytes <= LLONG_MAX / stripe_count) {
+                       /*
+                        * If min_stripe_maxbytes is not an even multiple of
+                        * stripe_size, then the last stripe in each object
+                        * cannot be completely filled and would leave a series
+                        * of unwritable holes in the file.
+                        * Trim the maximum file size to the last full stripe
+                        * for each object, plus the maximum object size for
+                        * the 0th stripe.
+                        */
+                       lov_bytes = (rounddown(min_stripe_maxbytes,
+                                             lsme->lsme_stripe_size) *
+                                   (stripe_count - 1)) + min_stripe_maxbytes;
+               } else {
                        lov_bytes = MAX_LFS_FILESIZE;
+               }
 out_dom1:
                *maxbytes = min_t(loff_t, lov_bytes, MAX_LFS_FILESIZE);
        }
index 1e22a3f..06f02dd 100644 (file)
@@ -1027,7 +1027,7 @@ static int lov_io_rw_iter_init(const struct lu_env *env,
                 "pos %lld, [%lld, %lld)\n", io->u.ci_rw.crw_pos,
                 lse->lsme_extent.e_start, lse->lsme_extent.e_end);
        next = min_t(__u64, next, lse->lsme_extent.e_end);
-       next = min_t(loff_t, next, lio->lis_io_endpos);
+       next = min_t(__u64, next, lio->lis_io_endpos);
 
        io->ci_continue = next < lio->lis_io_endpos;
        io->u.ci_rw.crw_bytes = next - io->u.ci_rw.crw_pos;
index 3efe9d9..a802aee 100644 (file)
@@ -959,7 +959,7 @@ static int osd_ldiskfs_map_inode_pages(struct inode *inode,
                        if (++i != pages)
                                continue;
                }
-               if (fp->index + clen >= max_page_index)
+               if (fp->index + clen > max_page_index)
                        GOTO(cleanup, rc = -EFBIG);
                /* process found extent */
                map.m_lblk = fp->index * blocks_per_page;
index aea1f2b..f0b822c 100644 (file)
@@ -50,7 +50,7 @@
 int main(int argc, char **argv)
 {
        int p_size;
-       unsigned int offset;
+       loff_t offset;
        char *filename;
        int fd;
        char buf[] = "+++";
index 913fb6e..52d5384 100755 (executable)
@@ -5890,6 +5890,122 @@ test_44a() {
 }
 run_test 44a "test sparse pwrite ==============================="
 
+test_44b() {
+       (( $OST1_VERSION >= $(version_code 2.15.61.137) )) ||
+               skip "Need OST >= 2.15.61.137 for large object handling"
+
+       $LFS setstripe -c 1 $DIR/$tfile || error "setstripe failed"
+       local off=$((2**32*4096-8192))
+       dd if=/dev/zero of=$DIR/$tfile bs=1 count=1 seek=$off conv=notrunc ||
+               error "dd failed"
+       cancel_lru_locks osc
+       $CHECKSTAT -s $((2**32*4096-8192+1)) $DIR/$tfile || error "wrong size"
+}
+run_test 44b "write one byte at offset 0xfffffffe000"
+
+test_44c() {
+       (( $OST1_VERSION >= $(version_code 2.15.61.137) )) ||
+               skip "Need OST >= 2.15.61.137 for large object handling"
+
+       local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
+       local max_object_bytes=$(import_param $osc_tgt max_object_bytes)
+
+       $LFS setstripe -c 1 $DIR/$tfile || error "setstripe failed"
+
+       dd if=/dev/zero of=$DIR/$tfile conv=notrunc bs=1 count=1 \
+               seek=$((max_object_bytes - 1)) || error "dd failed"
+       cancel_lru_locks osc
+       $CHECKSTAT -s $max_object_bytes $DIR/$tfile || error "wrong size"
+}
+run_test 44c "write 1 byte at max_object_bytes - 1 offset"
+
+test_44d() {
+       (( $OST1_VERSION >= $(version_code 2.15.61.137) )) ||
+               skip "Need OST >= 2.15.61.137 for large object handling"
+
+       local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)"
+       local max_object_bytes=$(import_param $osc_tgt max_object_bytes)
+
+       $LFS setstripe -c 2 $DIR/$tfile || error "setstripe failed"
+
+       local stripe_size=$($LFS getstripe -S $DIR/$tfile)
+       local off=$((max_object_bytes & ~(stripe_size - 1)))
+
+       $TRUNCATE $DIR/$tfile $off
+       dd if=/dev/zero of=$DIR/$tfile bs=1 count=1 seek=$off
+       local rc1=$?
+       dd if=/dev/zero of=$DIR/$tfile oflag=append conv=notrunc bs=1 count=1 \
+               seek=$off
+       local rc2=$?
+       [[ $rc1 -eq 0 && $rc2 -eq 0 ]] || error "one of dd commands failed"
+}
+run_test 44d "if write at position fails (EFBIG), so should do append"
+
+# write file until maximal size is reached
+max_file_size() {
+       local file=$1
+
+       off=1
+       minoff=1
+       while true; do
+               echo a | dd of=$1 bs=1 count=1 conv=notrunc seek=$off status=progress \
+                       2>/dev/null
+               [[ $? -ne 0 ]] && break
+               minoff=$off
+               off=$(echo "$off * 2" | bc)
+       done
+       maxoff=$off
+       minoff_1=$(echo $off + 1 | bc)
+       while [[ maxoff -ne minoff_1 ]]; do
+               off=$(echo "($maxoff + $minoff) / 2" | bc)
+               echo a | dd of=$1 bs=1 count=1 conv=notrunc seek=$off status=progress \
+                       2>/dev/null
+               [[ $? -eq 0 ]] && minoff=$off || maxoff=$off
+               minoff_1=$(echo $off + 1 | bc)
+       done
+       stat -c %s $file
+}
+
+test_44e_write_read()
+{
+       local ifile=$1
+       local ofile=$2
+       local stripe_count=$3
+       local stripe_size=$($LFS getstripe -S $ofile)
+       local file_size=$(max_file_size $ofile)
+       local write_count=$((stripe_count * stripe_size))
+       local offset=$((file_size - write_count))
+
+       dd if=/dev/urandom of=$ifile bs=$write_count count=1 ||
+               error "failed to write random data"
+
+       dd if=$ifile of=$ofile bs=$write_count count=1 oflag=seek_bytes \
+               seek=$offset conv=notrunc || error "dd failed"
+       cancel_lru_locks osc
+       cmp $ifile $ofile -i 0:$offset -n $write_count || error "cmp failed"
+}
+
+test_44e() {
+       (( $OST1_VERSION >= $(version_code 2.15.61.137) )) ||
+                   skip "Need OST >= 2.15.61.137 for large object handling"
+
+       local TF="$(mktemp --tmpdir -u $tfile.XXXXXX)"
+
+       $LFS setstripe -S 1M -c $OSTCOUNT $DIR/$tfile ||
+               error "lfs setstripe -S 1M -c $OSTCOUNT failed"
+       test_44e_write_read $TF $DIR/$tfile $OSTCOUNT
+       rm -f $DIR/$tfile
+       rm -f $TF
+
+       $LFS setstripe -S 1M -c $OSTCOUNT -C $((OSTCOUNT * 2)) $DIR/$tfile ||
+               error "lfs setstripe -S 1M -c $OSTCOUNT -C [* 2] failed"
+       test_44e_write_read $TF $DIR/$tfile $((OSTCOUNT * 2))
+
+       rm -f $DIR/$tfile
+       rm -f $TF
+}
+run_test 44e "write and read maximal stripes"
+
 dirty_osc_total() {
        tot=0
        for d in `lctl get_param -n ${OSC}.*.cur_dirty_bytes`; do
@@ -10127,13 +10243,6 @@ test_64c() {
 }
 run_test 64c "verify grant shrink"
 
-import_param() {
-       local tgt=$1
-       local param=$2
-
-       $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }"
-}
-
 # this does exactly what osc_request.c:osc_announce_cached() does in
 # order to calculate max amount of grants to ask from server
 want_grant() {
index a309253..63fcb2d 100755 (executable)
@@ -8887,6 +8887,13 @@ wait_clients_import_ready() {
        wait_clients_import_state "$1" "$2" "\(FULL\|IDLE\)"
 }
 
+import_param() {
+       local tgt=$1
+       local param=$2
+
+       $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }"
+}
+
 wait_osp_active() {
        local facet=$1
        local tgt_name=$2