Whamcloud - gitweb
EX-7601 ofd: do not use page cache for compressed files
authorPatrick Farrell <pfarrell@whamcloud.com>
Wed, 6 Dec 2023 15:55:24 +0000 (10:55 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 12 Dec 2023 04:03:46 +0000 (04:03 +0000)
It is challenging for the server to safely use the page
cache with compressed files, because if data is
decompressed in to the page cache, the data in cache now
differs from the data on disk.

This is a problem if *part* of the page cache is ever
evicted, because we can end up with a situation where a read
will be partially satisfied from cache and partially from
disk, but the data on disk is compressed and the data in
cache is not.

It is possible to deal with this by carefully ensuring the
page cache is not used just for decompressed data, but this
makes getting the buffers/lnbs for compressed files fairly
complicated.  Instead, we can just entirely block using the
server page cache for compressed files.

This must be done for both read and write, and only works
for ldiskfs - ZFS cannot easily be forced to not use its
page cache.  But that's OK because we do not support CSDC
with ZFS.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Iee73abb29ad5631bb2203c2133756d7ebf5b686d
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53348
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/dt_object.h
lustre/ofd/ofd_io.c
lustre/osd-ldiskfs/osd_io.c

index 25c3697..00eb7ba 100644 (file)
@@ -1148,6 +1148,7 @@ enum dt_bufs_type {
        DT_BUFS_TYPE_WRITE      = 0x0001,
        DT_BUFS_TYPE_READAHEAD  = 0x0002,
        DT_BUFS_TYPE_LOCAL      = 0x0004,
+       DT_BUFS_TYPE_COMPRESSION        = 0x0008,
 };
 
 /**
index 1ef0723..3c828fd 100644 (file)
@@ -620,6 +620,9 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        if (ptlrpc_connection_is_local(exp->exp_connection))
                dbt |= DT_BUFS_TYPE_LOCAL;
 
+       if (chunk_size)
+               dbt |= DT_BUFS_TYPE_COMPRESSION;
+
        begin = -1;
        end = 0;
 
@@ -822,6 +825,9 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                GOTO(out, rc = -ENOENT);
        }
 
+       if (chunk_size)
+               dbt |= DT_BUFS_TYPE_COMPRESSION;
+
        if (ptlrpc_connection_is_local(exp->exp_connection))
                dbt |= DT_BUFS_TYPE_LOCAL;
 
index 750458a..7c5294f 100644 (file)
@@ -703,7 +703,8 @@ static int osd_init_lnbs(loff_t offset, ssize_t len, int *nrpages,
 }
 
 static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt,
-                                loff_t offset, gfp_t gfp_mask, bool cache)
+                                loff_t offset, gfp_t gfp_mask, bool cache,
+                                bool compression)
 {
        struct osd_thread_info *oti = osd_oti_get(env);
        struct inode *inode = osd_dt_obj(dt)->oo_inode;
@@ -714,6 +715,7 @@ static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt,
        LASSERT(inode);
 
        if (cache) {
+               LASSERT(!compression);
                page = find_or_create_page(inode->i_mapping,
                                           offset >> PAGE_SHIFT, gfp_mask);
 
@@ -733,7 +735,16 @@ static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt,
                page = find_lock_page(inode->i_mapping, offset >> PAGE_SHIFT);
                if (page) {
                        wait_on_page_writeback(page);
-                       return page;
+                       if (!compression)
+                               return page;
+                       /* ldiskfs truncate creates a single page in the page
+                        * cache, so if we're in that case, we must force it
+                        * out (there is no data in it, so this is safe)
+                        */
+                       CDEBUG(D_SEC,
+                              "compression, found page at %llu, removing\n",
+                              offset);
+                       generic_error_remove_page(inode->i_mapping, page);
                }
        }
 
@@ -876,6 +887,13 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
        iosize = fsize - lnb[0].lnb_file_offset;
        fsize = max(fsize, i_size_read(obj->oo_inode));
 
+       if (rw & DT_BUFS_TYPE_COMPRESSION) {
+               CDEBUG(D_INODE,
+                      "Compression - not using page cache for IO at %lld, %ld bytes\n",
+                      pos, len);
+               cache = false;
+               goto bypass_checks;
+       }
        cache = rw & DT_BUFS_TYPE_READAHEAD;
        if (cache)
                goto bypass_checks;
@@ -921,7 +939,8 @@ bypass_checks:
                                             GFP_HIGHUSER;
        for (i = 0; i < npages; i++, lnb++) {
                lnb->lnb_page = osd_get_page(env, dt, lnb->lnb_file_offset,
-                                            gfp_mask, cache);
+                                            gfp_mask, cache,
+                                            rw & DT_BUFS_TYPE_COMPRESSION);
                if (lnb->lnb_page == NULL)
                        GOTO(cleanup, rc = -ENOMEM);