Whamcloud - gitweb
LU-13397 lfs: mirror resync to keep sparseness 73/40773/11
authorMikhail Pershin <mpershin@whamcloud.com>
Wed, 25 Nov 2020 16:05:05 +0000 (19:05 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 6 Apr 2021 03:02:03 +0000 (03:02 +0000)
Use SEEK_HOLE/SEEK_DATA in llapi_mirror_resync_many() to
copy just data chunks between components. Holes at the last
component are done with truncate(), holes in other components
are done with fallocate(FALLOC_FL_PUNCH_HOLE). In case of any
punch() error the hole is just copied via read(), i.e. as zeroes

Currently fallocate(FALLOC_FL_PUNCH_HOLE) is not supported yet,
so resync preserves sparseness only for last components

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Id249739c5cd2d1c8a998da3341d326de1a8b8d32
Reviewed-on: https://review.whamcloud.com/40773
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre/lustreapi.h
lustre/tests/sanity-flr.sh
lustre/utils/liblustreapi_layout.c
lustre/utils/liblustreapi_lseek.c
lustre/utils/liblustreapi_mirror.c

index 905fba6..2619002 100644 (file)
@@ -610,6 +610,7 @@ int llapi_group_unlock(int fd, int gid);
 
 bool llapi_file_is_sparse(int fd);
 off_t llapi_data_seek(int src_fd, off_t offset, size_t *length);
 
 bool llapi_file_is_sparse(int fd);
 off_t llapi_data_seek(int src_fd, off_t offset, size_t *length);
+int llapi_hole_punch(int fd, off_t start, size_t length);
 
 /* Ladvise */
 int llapi_ladvise(int fd, unsigned long long flags, int num_advise,
 
 /* Ladvise */
 int llapi_ladvise(int fd, unsigned long long flags, int num_advise,
@@ -1157,6 +1158,7 @@ ssize_t llapi_mirror_copy_many(int fd, __u16 src, __u16 *dst, size_t count);
 int llapi_mirror_copy(int fd, unsigned int src, unsigned int dst,
                      off_t pos, size_t count);
 off_t llapi_mirror_data_seek(int fd, unsigned int id, off_t pos, size_t *size);
 int llapi_mirror_copy(int fd, unsigned int src, unsigned int dst,
                      off_t pos, size_t count);
 off_t llapi_mirror_data_seek(int fd, unsigned int id, off_t pos, size_t *size);
+int llapi_mirror_punch(int fd, unsigned int id, off_t start, size_t length);
 
 int llapi_heat_get(int fd, struct lu_heat *heat);
 int llapi_heat_set(int fd, __u64 flags);
 
 int llapi_heat_get(int fd, struct lu_heat *heat);
 int llapi_heat_set(int fd, __u64 flags);
index 841619a..b15e70f 100644 (file)
@@ -2465,6 +2465,117 @@ test_50a() {
 }
 run_test 50a "mirror extend/copy preserves sparseness"
 
 }
 run_test 50a "mirror extend/copy preserves sparseness"
 
+test_50b() {
+       $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' ||
+               skip "OST does not support SEEK_HOLE"
+
+       local file=$DIR/$tdir/$tfile
+       local offset
+       local sum1
+       local sum2
+       local blocks
+
+       mkdir -p $DIR/$tdir
+
+       echo " ** create mirrored file $file"
+       $LFS mirror create -N -E1M -c1 -S1M -E eof \
+               -N -E2M -S1M -E eof -S2M $file ||
+               error "cannot create mirrored file"
+       echo " ** write data chunk at 1M boundary"
+       dd if=/dev/urandom of=$file bs=1k count=20 seek=1021 ||
+               error "cannot write data at 1M boundary"
+       echo " ** create hole at the file end"
+       $TRUNCATE $file 3700000 || error "truncate fails"
+
+       echo " ** verify sparseness"
+       offset=$(lseek_test -d 1000 $file)
+       echo "    first data offset: $offset"
+       [[ $offset == 1000 ]] &&
+               error "src: data is not expected at offset $offset"
+       offset=$(lseek_test -l 3500000 $file)
+       echo "    hole at the end: $offset"
+       [[ $offset == 3500000 ]] ||
+               error "src: hole is expected at 3500000"
+
+       echo " ** resync mirror #2 to mirror #1"
+       $LFS mirror resync $file
+
+       # check llapi_mirror_copy_many correctness
+       sum_1=$($LFS mirror read -N 1 $file | md5sum)
+       sum_2=$($LFS mirror read -N 2 $file | md5sum)
+       [[ $sum_1 == $sum_2 ]] ||
+               error "data mismatch: \'$sum_1\' vs. \'$sum_2\'"
+
+       cancel_lru_locks osc
+
+       blocks=$(stat -c%b $file)
+       echo " ** consumed blocks: $blocks"
+       # without full punch() support the first component can be not sparse
+       # but the last one should be, so file should use far fewer blocks
+       (( blocks < 5000 )) ||
+               error "Mirrored file consumes $blocks blocks"
+
+       # stale first component in mirror #1
+       $LFS setstripe --comp-set -I0x10001 --comp-flags=stale,nosync $file
+       echo " ** truncate file down"
+       $TRUNCATE $file 0
+       echo " ** write data chunk at 2M boundary"
+       dd if=/dev/urandom of=$file bs=1k count=20 seek=2041 conv=notrunc ||
+               error "cannot write data at 2M boundary"
+       echo " ** resync mirror #2 to mirror #1 with nosync 1st component"
+       $LFS mirror resync $file || error "mirror rsync fails"
+       # first component is still stale
+       $LFS getstripe $file | grep 'lcme_flags:.*stale' > /dev/null ||
+               error "$file still has no stale component"
+       echo " ** resync mirror #2 to mirror #1 again"
+       $LFS setstripe --comp-set -I0x10001 --comp-flags=stale,^nosync $file
+       $LFS mirror resync $file || error "mirror rsync fails"
+       $LFS getstripe $file | grep 'lcme_flags:.*stale' > /dev/null &&
+               error "$file still has stale component"
+
+       # check llapi_mirror_copy_many correctness
+       sum_1=$($LFS mirror read -N 1 $file | md5sum)
+       sum_2=$($LFS mirror read -N 2 $file | md5sum)
+       [[ $sum_1 == $sum_2 ]] ||
+               error "data mismatch: \'$sum_1\' vs. \'$sum_2\'"
+
+       cancel_lru_locks osc
+
+       blocks=$(stat -c%b $file)
+       echo " ** final consumed blocks: $blocks"
+       # while the first component can lose sparseness, the last one should
+       # not, so whole file should still use far fewer blocks in total
+       (( blocks < 3000 )) ||
+               error "Mirrored file consumes $blocks blocks"
+       rm $file
+}
+run_test 50b "mirror rsync handles sparseness"
+
+test_60a() {
+       $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' ||
+               skip "OST does not support SEEK_HOLE"
+
+       local file=$DIR/$tdir/$tfile
+       local old_size=2147483648 # 2GiB
+       local new_size
+
+       mkdir -p $DIR/$tdir
+       dd if=/dev/urandom of=$file bs=4096 count=1 seek=$((134217728 / 4096))
+       $TRUNCATE $file $old_size
+
+       $LFS mirror extend -N -c 1 $file
+       dd if=/dev/urandom of=$file bs=4096 count=1 seek=$((134217728 / 4096)) conv=notrunc
+       $LFS mirror resync $file
+
+       new_size=$(stat --format='%s' $file)
+       if ((new_size != old_size)); then
+               error "new_size ($new_size) is not equal to old_size ($old_size)"
+       fi
+
+       rm $file
+}
+run_test 60a "mirror extend sets correct size on sparse file"
+
 ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
 lock_file=$(mktemp /var/lock/FLR.XXXXXX)
 
 ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
 lock_file=$(mktemp /var/lock/FLR.XXXXXX)
 
index 123a491..82e3c4a 100644 (file)
@@ -2909,6 +2909,8 @@ uint32_t llapi_mirror_find(struct llapi_layout *layout,
                if (rc < 0)
                        return rc;
        }
                if (rc < 0)
                        return rc;
        }
+       if (!mirror_id)
+               return -ENOENT;
 
        return mirror_id;
 }
 
        return mirror_id;
 }
@@ -2917,11 +2919,12 @@ int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
                             struct llapi_resync_comp *comp_array,
                             int comp_size,  uint64_t start, uint64_t end)
 {
                             struct llapi_resync_comp *comp_array,
                             int comp_size,  uint64_t start, uint64_t end)
 {
-       uint64_t count;
        size_t page_size = sysconf(_SC_PAGESIZE);
        const size_t buflen = 4 << 20; /* 4M */
        void *buf;
        uint64_t pos = start;
        size_t page_size = sysconf(_SC_PAGESIZE);
        const size_t buflen = 4 << 20; /* 4M */
        void *buf;
        uint64_t pos = start;
+       uint64_t data_off = pos, data_end = pos;
+       uint32_t src = 0;
        int i;
        int rc;
        int rc2 = 0;
        int i;
        int rc;
        int rc2 = 0;
@@ -2930,31 +2933,107 @@ int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
        if (rc)
                return -rc;
 
        if (rc)
                return -rc;
 
-       if (end == OBD_OBJECT_EOF)
-               count = OBD_OBJECT_EOF;
-       else
-               count = end - start;
-
-       while (count > 0) {
-               uint32_t src;
-               uint64_t mirror_end = 0;
-               uint64_t bytes_left;
+       while (pos < end) {
+               uint64_t mirror_end;
                ssize_t bytes_read;
                size_t to_read;
                size_t to_write;
 
                ssize_t bytes_read;
                size_t to_read;
                size_t to_write;
 
-               src = llapi_mirror_find(layout, pos, end, &mirror_end);
-               if (src == 0)
-                       return -ENOENT;
+               if (pos >= data_end) {
+                       off_t tmp_off;
+                       size_t data_size;
+
+                       if (pos >= mirror_end || !src) {
+                               rc = llapi_mirror_find(layout, pos, end,
+                                                       &mirror_end);
+                               if (rc < 0)
+                                       return rc;
+                               src = rc;
+                               /* restrict mirror end by resync end */
+                               mirror_end = MIN(end, mirror_end);
+                       }
 
 
-               if (mirror_end == OBD_OBJECT_EOF) {
-                       bytes_left = count;
-               } else {
-                       bytes_left = MIN(count, mirror_end - pos);
-                       bytes_left = ((bytes_left - 1) | (page_size - 1)) + 1;
+                       tmp_off = llapi_mirror_data_seek(fd, src, pos,
+                                                        &data_size);
+                       if (tmp_off < 0) {
+                               /* switch to full copy */
+                               to_read = mirror_end - pos;
+                               goto do_read;
+                       }
+                       data_off = tmp_off;
+                       data_end = data_off + data_size;
+
+                       data_off = MIN(data_off, mirror_end);
+                       data_end = MIN(data_end, mirror_end);
+
+                       /* align by page, if there is data block to copy */
+                       if (data_size)
+                               data_off &= ~(page_size - 1);
                }
                }
-               to_read = MIN(buflen, bytes_left);
 
 
+               if (pos < data_off) {
+                       for (i = 0; i < comp_size; i++) {
+                               uint64_t cur_pos;
+                               size_t to_punch;
+                               uint32_t mid = comp_array[i].lrc_mirror_id;
+
+                               /* skip non-overlapped component */
+                               if (pos >= comp_array[i].lrc_end ||
+                                   data_off <= comp_array[i].lrc_start)
+                                       continue;
+
+                               if (pos < comp_array[i].lrc_start)
+                                       cur_pos = comp_array[i].lrc_start;
+                               else
+                                       cur_pos = pos;
+
+                               if (data_off > comp_array[i].lrc_end)
+                                       to_punch = comp_array[i].lrc_end -
+                                                  cur_pos;
+                               else
+                                       to_punch = data_off - cur_pos;
+
+                               if (comp_array[i].lrc_end == OBD_OBJECT_EOF) {
+                                       /* the last component can be truncated
+                                        * safely
+                                        */
+                                       rc = llapi_mirror_truncate(fd, mid,
+                                                                  cur_pos);
+                                       /* hole at the end of file, so just
+                                        * truncate up to set size.
+                                        */
+                                       if (!rc && data_off == data_end)
+                                               rc = llapi_mirror_truncate(fd,
+                                                               mid, data_end);
+                               } else {
+                                       rc = llapi_mirror_punch(fd,
+                                               comp_array[i].lrc_mirror_id,
+                                               cur_pos, to_punch);
+                               }
+                               /* if failed then read failed hole range */
+                               if (rc < 0) {
+                                       rc = 0;
+                                       pos = cur_pos;
+                                       if (pos + to_punch == data_off)
+                                               to_read = data_end - pos;
+                                       else
+                                               to_read = to_punch;
+                                       goto do_read;
+                               }
+                       }
+                       pos = data_off;
+               }
+               if (pos == mirror_end)
+                       continue;
+               to_read = data_end - pos;
+do_read:
+               if (!to_read)
+                       break;
+
+               assert(data_end <= mirror_end);
+
+               to_read = MIN(buflen, to_read);
+               to_read = ((to_read - 1) | (page_size - 1)) + 1;
                bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
                if (bytes_read == 0) {
                        /* end of file */
                bytes_read = llapi_mirror_read(fd, src, buf, to_read, pos);
                if (bytes_read == 0) {
                        /* end of file */
@@ -3012,9 +3091,7 @@ int llapi_mirror_resync_many(int fd, struct llapi_layout *layout,
                        }
                        assert(written == to_write2);
                }
                        }
                        assert(written == to_write2);
                }
-
                pos += bytes_read;
                pos += bytes_read;
-               count -= bytes_read;
        }
 
        free(buf);
        }
 
        free(buf);
index 0b82bfa..cd3f008 100644 (file)
@@ -33,6 +33,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#ifndef FALLOC_FL_PUNCH_HOLE
+#include <linux/falloc.h> /* for RHEL7.3 glibc-headers and earlier */
+#endif
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <errno.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <errno.h>
@@ -109,3 +112,23 @@ off_t llapi_data_seek(int src_fd, off_t offset, size_t *length)
        *length = hole_off - data_off;
        return data_off;
 }
        *length = hole_off - data_off;
        return data_off;
 }
+
+/**
+ * Punch hole in a file.
+ *
+ * \param fd     file descriptor
+ * \param start  offset to start from
+ * \param length hole length
+ *
+ * \retval 0 on success.
+ * \retval -errno on failure to punch hole
+ */
+int llapi_hole_punch(int fd, off_t start, size_t length)
+{
+       int rc;
+
+       rc = fallocate(fd, FALLOC_FL_PUNCH_HOLE, start, length);
+       if (rc)
+               rc = -errno;
+       return rc;
+}
index a67629d..55d9132 100644 (file)
@@ -203,6 +203,20 @@ int llapi_mirror_truncate(int fd, unsigned int id, off_t length)
        return rc;
 }
 
        return rc;
 }
 
+int llapi_mirror_punch(int fd, unsigned int id, off_t start, size_t length)
+{
+       int rc;
+
+       rc = llapi_mirror_set(fd, id);
+       if (rc < 0)
+               return rc;
+
+       rc = llapi_hole_punch(fd, start, length);
+       (void) llapi_mirror_clear(fd);
+
+       return rc;
+}
+
 bool llapi_mirror_is_sparse(int fd, unsigned int id)
 {
        bool sparse;
 bool llapi_mirror_is_sparse(int fd, unsigned int id)
 {
        bool sparse;