Whamcloud - gitweb
LU-17110 llite: fix slab corruption with fm_extent_count=0 52/52352/4
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Tue, 12 Sep 2023 16:06:25 +0000 (18:06 +0200)
committerOleg Drokin <green@whamcloud.com>
Thu, 28 Sep 2023 08:01:31 +0000 (08:01 +0000)
If userspace uses fiemap with .fm_extent_count=0, .fm_extents[0] is
not allocated. Writing on the first entry without checking the extent
count could lead to memory corruption (slab).

This patch fix also the case when osc is disable: FIEMAP_EXTENT_LAST
should be set on the extent (fe_flags) and not on the fiemap struct.

Add a regression test sanityn 71d to test fiemap with
fm_extent_count=0.
Add a regression test sanity-hsm 408 to test fiemap on release files.

Fixes: 4097196 ("LU-11848 lov: FIEMAP support for PFL and FLR file")
Test-Parameters:testlist=sanityn
Test-Parameters:testlist=sanityn env=ONLY=71d,ONLY_REPEAT=20
Test-Parameters:testlist=sanity-hsm env=ONLY=408,ONLY_REPEAT=20
Signed-off-by: Etienne AUJAMES <etienne.aujames@cea.fr>
Change-Id: Id63c6973540187e678020977f2d555dfcbf3c634
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52352
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Feng Lei <flei@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/file.c
lustre/lov/lov_object.c
lustre/tests/Makefile.am
lustre/tests/checkfiemap.c
lustre/tests/sanity-hsm.sh
lustre/tests/sanityn.sh

index 1eccf51..5c078c0 100644 (file)
@@ -5785,7 +5785,7 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        rc = ll_do_fiemap(inode, fiemap, num_bytes);
 
-       if (IS_ENCRYPTED(inode)) {
+       if (IS_ENCRYPTED(inode) && extent_count > 0) {
                int i;
 
                for (i = 0; i < fiemap->fm_mapped_extents; i++)
index 40d2cb6..88c15bc 100644 (file)
@@ -1739,7 +1739,7 @@ static u64 fiemap_calc_fm_end_offset(struct fiemap *fiemap,
                                     int *start_stripe)
 {
        struct lov_stripe_md_entry *lsme = lsm->lsm_entries[index];
-       u64 local_end = fiemap->fm_extents[0].fe_logical;
+       u64 local_end;
        u64 lun_end;
        u64 fm_end_offset;
        int stripe_no = -1;
@@ -1748,6 +1748,7 @@ static u64 fiemap_calc_fm_end_offset(struct fiemap *fiemap,
            fiemap->fm_extents[0].fe_logical == 0)
                return 0;
 
+       local_end = fiemap->fm_extents[0].fe_logical;
        stripe_no = *start_stripe;
 
        if (stripe_no == -1)
@@ -1899,12 +1900,15 @@ static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj,
                        GOTO(obj_put, rc = -EINVAL);
                /* If OST is inactive, return extent with UNKNOWN flag. */
                if (!lov->lov_tgts[ost_index]->ltd_active) {
-                       fs->fs_fm->fm_flags |= FIEMAP_EXTENT_LAST;
+
                        fs->fs_fm->fm_mapped_extents = 1;
+                       if (fs->fs_fm->fm_extent_count == 0)
+                               goto inactive_tgt;
 
                        fm_ext[0].fe_logical = obd_start;
                        fm_ext[0].fe_length = obd_end - obd_start + 1;
-                       fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+                       fm_ext[0].fe_flags |=
+                               FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
 
                        goto inactive_tgt;
                }
@@ -2053,6 +2057,9 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
                         * request fits in file-size.
                         */
                        fiemap->fm_mapped_extents = 1;
+                       if (fiemap->fm_extent_count == 0)
+                               GOTO(out_lsm, rc = 0);
+
                        fiemap->fm_extents[0].fe_logical = fiemap->fm_start;
                        if (fiemap->fm_start + fiemap->fm_length <
                            fmkey->lfik_oa.o_size)
@@ -2223,11 +2230,6 @@ finish:
        else
                cur_ext = 0;
 
-       /* done all the processing */
-       if (entry > end_entry ||
-           (fs.fs_enough && fs.fs_finish_stripe && entry == end_entry))
-               fiemap->fm_extents[cur_ext].fe_flags |= FIEMAP_EXTENT_LAST;
-
        /* Indicate that we are returning device offsets unless file just has
         * single stripe */
        if (lsm->lsm_entry_count > 1 ||
@@ -2238,6 +2240,11 @@ finish:
        if (fiemap->fm_extent_count == 0)
                goto skip_last_device_calc;
 
+       /* done all the processing */
+       if (entry > end_entry ||
+           (fs.fs_enough && fs.fs_finish_stripe && entry == end_entry))
+               fiemap->fm_extents[cur_ext].fe_flags |= FIEMAP_EXTENT_LAST;
+
 skip_last_device_calc:
        fiemap->fm_mapped_extents = fs.fs_cur_extent;
 out_fm_local:
index ab8b3f9..ad340b1 100644 (file)
@@ -131,6 +131,7 @@ createmany_LDADD=$(LIBLUSTREAPI)
 create_foreign_dir_LDADD = $(LIBLUSTREAPI)
 check_fallocate_LDADD = $(LIBLUSTREAPI)
 fsx_LDADD = $(LIBLUSTREAPI)
+checkfiemap_LDADD = -lpthread
 if LIBAIO
 aiocp_LDADD= -laio
 endif
index 59284ae..f396bbf 100644 (file)
@@ -38,6 +38,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <pthread.h>
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/lustre/lustre_user.h>
 
 #define ONEMB 1048576
 
+static inline void print_extent_flags(unsigned int flags) {
+       if (!flags)
+               return;
+
+       printf("flags (0x%x):", flags);
+       if (flags & FIEMAP_EXTENT_LAST)
+               printf(" LAST");
+       if (flags & FIEMAP_EXTENT_UNKNOWN)
+               printf(" UNKNOWN");
+       if (flags & FIEMAP_EXTENT_DELALLOC)
+               printf(" DELALLOC");
+       if (flags & FIEMAP_EXTENT_ENCODED)
+               printf(" ENCODED");
+       if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
+               printf(" DATA_ENCRYPTED");
+       if (flags & FIEMAP_EXTENT_NOT_ALIGNED)
+               printf(" NOT_ALIGNED");
+       if (flags & FIEMAP_EXTENT_DATA_INLINE)
+               printf(" DATA_INLINE");
+       if (flags & FIEMAP_EXTENT_DATA_TAIL)
+               printf(" DATA_TAIL");
+       if (flags & FIEMAP_EXTENT_UNWRITTEN)
+               printf(" UNWRITTEN");
+       if (flags & FIEMAP_EXTENT_MERGED)
+               printf(" MERGED");
+       if (flags & FIEMAP_EXTENT_SHARED)
+               printf(" SHARED");
+       if (flags & FIEMAP_EXTENT_NET)
+               printf(" NET");
+       printf("\n");
+}
+
+
 /* This test executes fiemap ioctl and check
  * a) there are no file ranges marked with FIEMAP_EXTENT_UNWRITTEN
  * b) data ranges sizes sum is equal to given in second param */
-static int check_fiemap(int fd, long long orig_size)
+static int check_fiemap(int fd, long long expected_sum,
+                       unsigned int *mapped_extents)
 {
        /* This buffer is enougth for 1MB length file */
        union { struct fiemap f; char c[4096]; } fiemap_buf;
@@ -60,7 +95,7 @@ static int check_fiemap(int fd, long long orig_size)
        unsigned int count = (sizeof(fiemap_buf) - sizeof(*fiemap)) /
                        sizeof(*fm_extents);
        unsigned int i = 0;
-       long long file_size = 0;
+       long long ext_len_sum = 0;
 
        memset(&fiemap_buf, 0, sizeof(fiemap_buf));
 
@@ -75,26 +110,65 @@ static int check_fiemap(int fd, long long orig_size)
        }
 
        for (i = 0; i < fiemap->fm_mapped_extents; i++) {
-               printf("extent in "
-                       "offset %lu, length %lu\n"
-                       "flags: %x\n",
-                       (unsigned long)fm_extents[i].fe_logical,
-                       (unsigned long)fm_extents[i].fe_length,
-                       fm_extents[i].fe_flags);
+               printf("extent %d in offset %lu, length %lu\n",
+                       i, (unsigned long)fm_extents[i].fe_logical,
+                       (unsigned long)fm_extents[i].fe_length);
+
+               print_extent_flags(fm_extents[i].fe_flags);
 
                if (fm_extents[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
                        fprintf(stderr, "Unwritten extent\n");
                        return -2;
                } else {
-                       file_size += fm_extents[i].fe_length;
+                       ext_len_sum += fm_extents[i].fe_length;
                }
        }
 
-       printf("No unwritten extents, extents number %u, "
-               "file size %lli, original size %lli\n",
+       printf("No unwritten extents, extents number %u, sum of lengths %lli, expected sum %lli\n",
                fiemap->fm_mapped_extents,
-               file_size, orig_size);
-       return file_size != orig_size;
+               ext_len_sum, expected_sum);
+
+       *mapped_extents = fiemap->fm_mapped_extents;
+       return ext_len_sum != expected_sum || (expected_sum && !*mapped_extents);
+}
+
+/**
+ * LU-17110
+ * When userspace uses fiemap with fm_extent_count=0, it means that kernelspace
+ * should return only the number of extents. So we should always check
+ * fm_extent_count before accessing to fm_extents array. Otherwise this could
+ * lead to buffer overflow and slab memory corruption.
+ */
+struct th_args {
+       int fd;
+       int iter_nbr;
+       int expected_mapped;
+};
+
+static void *corruption_th(void *args)
+{
+       int i;
+       struct th_args *ta = args;
+       struct fiemap fiemap = {
+               .fm_start = 0,
+               .fm_flags = (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER),
+               .fm_extent_count = 0,
+               .fm_length = FIEMAP_MAX_OFFSET };
+
+       for (i = 0; i < ta->iter_nbr; i++) {
+               if (ioctl(ta->fd, FS_IOC_FIEMAP, &fiemap) < 0) {
+                       fprintf(stderr, "error while ioctl: %s\n",
+                               strerror(errno));
+                       return (void *) (long long) -errno;
+               }
+               if (ta->expected_mapped != fiemap.fm_mapped_extents) {
+                       fprintf(stderr, "mapped extents mismatch: expected=%d, returned=%d\n",
+                               ta->expected_mapped, fiemap.fm_mapped_extents);
+                       return (void *) -EINVAL;
+               }
+       }
+
+       return NULL;
 }
 
 int main(int argc, char **argv)
@@ -102,16 +176,22 @@ int main(int argc, char **argv)
        int c;
        struct option long_opts[] = {
                { .name = "test", .has_arg = no_argument, .val = 't' },
+               { .name = "corruption_test", .has_arg = no_argument, .val = 'c' },
                { .name = NULL }
        };
        int fd;
        int rc;
+       unsigned int mapped_extents = 0;
+       bool corruption_test = false;
 
        optind = 0;
-       while ((c = getopt_long(argc, argv, "t", long_opts, NULL)) != -1) {
+       while ((c = getopt_long(argc, argv, "tc", long_opts, NULL)) != -1) {
                switch (c) {
                case 't':
                        return 0;
+               case 'c':
+                       corruption_test = true;
+                       break;
                default:
                        fprintf(stderr, "error: %s: option '%s' unrecognized\n",
                                argv[0], argv[optind - 1]);
@@ -131,10 +211,34 @@ int main(int argc, char **argv)
                return -1;
        }
 
-       fprintf(stderr, "fd: %i\n", fd);
-
-       rc = check_fiemap(fd, atoll(argv[optind + 1]));
-
+       rc = check_fiemap(fd, atoll(argv[optind + 1]), &mapped_extents);
+       if (rc)
+               goto close;
+
+       if (corruption_test) {
+               pthread_t th[200];
+               int i;
+               void *rval = NULL;
+               struct th_args args = {
+                       .fd = fd,
+                       .expected_mapped = mapped_extents,
+                       .iter_nbr = 500
+               };
+
+               for (i = 0; i < 200; i++) {
+                       rc = pthread_create(&th[i], NULL, corruption_th, &args);
+                       if (rc)
+                               goto close;
+               }
+               for (i = 0; i < 200; i++) {
+                       rc = pthread_join(th[i], &rval);
+                       if (rc || rval) {
+                               rc =  1;
+                               goto close;
+                       }
+               }
+       }
+close:
        if (close(fd) < 0)
                fprintf(stderr, "closing %s, error %i", argv[optind], errno);
 
index fd1b2e0..414649d 100755 (executable)
@@ -5408,6 +5408,40 @@ test_407() {
 }
 run_test 407 "Check for double RESTORE records in llog"
 
+test_408 () { #LU-17110
+       checkfiemap --test ||
+               skip "checkfiemap not runnable: $?"
+
+       local f=$DIR/$tfile
+       local fid
+       local out;
+
+       copytool setup
+
+       # write data this way: hole - data - hole - data
+       dd if=/dev/urandom of=$f bs=64K count=1 conv=fsync
+       [[ "$(facet_fstype ost$(($($LFS getstripe -i $f) + 1)))" != "zfs" ]] ||
+               skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS"
+       dd if=/dev/urandom of=$f bs=64K seek=3 count=1 conv=fsync
+       stat $DIR/$tfile
+       echo "disk usage: $(du -B1 $f)"
+       echo "file size: $(du -b $f)"
+
+       fid=$(path2fid $f)
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_release $f
+
+       out=$(checkfiemap --corruption_test $f $((4 * 64 * 1024))) ||
+               error "checkfiemap failed"
+
+       echo "$out"
+       grep -q "flags (0x3):" <<< "$out" ||
+               error "the extent flags of a relase file should be: LAST UNKNOWN"
+
+}
+run_test 408 "Verify fiemap on release file"
+
 test_500()
 {
        [ "$MDS1_VERSION" -lt $(version_code 2.6.92) ] &&
index bfc9dd6..27fc369 100755 (executable)
@@ -3904,6 +3904,27 @@ test_71c() {
 }
 run_test 71c "check FIEMAP_EXTENT_LAST flag with different extents number"
 
+test_71d() { #LU-17110
+       checkfiemap --test ||
+               skip "error $?: checkfiemap failed"
+
+       local f=$DIR/$tfile
+
+       # write data this way: hole - data - hole - data
+       dd if=/dev/urandom of=$f bs=64K count=1
+       [[ "$(facet_fstype ost$(($($LFS getstripe -i $f) + 1)))" != "zfs" ]] ||
+               skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS"
+       dd if=/dev/urandom of=$f bs=64K seek=2 count=1
+       dd if=/dev/urandom of=$f bs=64K seek=4 count=1
+       dd if=/dev/urandom of=$f bs=64K seek=6 count=1 conv=fsync
+       echo "disk usage: $(du -B1 $f)"
+       echo "file size: $(du -b $f)"
+
+       checkfiemap --corruption_test $f $((4 * 64 *1024)) ||
+               error "checkfiemap failed"
+}
+run_test 71d "fiemap corruption test with fm_extent_count=0"
+
 test_72() {
        local p="$TMP/sanityN-$TESTNAME.parameters"
        local tlink1