From 972e4333e4c1ca78b609a77fe9f9ade2db9f72af Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 6 Dec 2023 10:55:24 -0500 Subject: [PATCH] EX-7601 ofd: do not use page cache for compressed files It is challenging for the server to safely use the page cache with compressed files, because if data is decompressed in to the page cache, the data in cache now differs from the data on disk. This is a problem if *part* of the page cache is ever evicted, because we can end up with a situation where a read will be partially satisfied from cache and partially from disk, but the data on disk is compressed and the data in cache is not. It is possible to deal with this by carefully ensuring the page cache is not used just for decompressed data, but this makes getting the buffers/lnbs for compressed files fairly complicated. Instead, we can just entirely block using the server page cache for compressed files. This must be done for both read and write, and only works for ldiskfs - ZFS cannot easily be forced to not use its page cache. But that's OK because we do not support CSDC with ZFS. Signed-off-by: Patrick Farrell Change-Id: Iee73abb29ad5631bb2203c2133756d7ebf5b686d Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53348 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Artem Blagodarenko Reviewed-by: Andreas Dilger --- lustre/include/dt_object.h | 1 + lustre/ofd/ofd_io.c | 6 ++++++ lustre/osd-ldiskfs/osd_io.c | 25 ++++++++++++++++++++++--- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 25c3697..00eb7ba 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1148,6 +1148,7 @@ enum dt_bufs_type { DT_BUFS_TYPE_WRITE = 0x0001, DT_BUFS_TYPE_READAHEAD = 0x0002, DT_BUFS_TYPE_LOCAL = 0x0004, + DT_BUFS_TYPE_COMPRESSION = 0x0008, }; /** diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 1ef0723..3c828fd 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -620,6 +620,9 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, if (ptlrpc_connection_is_local(exp->exp_connection)) dbt |= DT_BUFS_TYPE_LOCAL; + if (chunk_size) + dbt |= DT_BUFS_TYPE_COMPRESSION; + begin = -1; end = 0; @@ -822,6 +825,9 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, GOTO(out, rc = -ENOENT); } + if (chunk_size) + dbt |= DT_BUFS_TYPE_COMPRESSION; + if (ptlrpc_connection_is_local(exp->exp_connection)) dbt |= DT_BUFS_TYPE_LOCAL; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 750458a..7c5294f1 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -703,7 +703,8 @@ static int osd_init_lnbs(loff_t offset, ssize_t len, int *nrpages, } static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt, - loff_t offset, gfp_t gfp_mask, bool cache) + loff_t offset, gfp_t gfp_mask, bool cache, + bool compression) { struct osd_thread_info *oti = osd_oti_get(env); struct inode *inode = osd_dt_obj(dt)->oo_inode; @@ -714,6 +715,7 @@ static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt, LASSERT(inode); if (cache) { + LASSERT(!compression); page = find_or_create_page(inode->i_mapping, offset >> PAGE_SHIFT, gfp_mask); @@ -733,7 +735,16 @@ static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt, page = find_lock_page(inode->i_mapping, offset >> PAGE_SHIFT); if (page) { wait_on_page_writeback(page); - return page; + if (!compression) + return page; + /* ldiskfs truncate creates a single page in the page + * cache, so if we're in that case, we must force it + * out (there is no data in it, so this is safe) + */ + CDEBUG(D_SEC, + "compression, found page at %llu, removing\n", + offset); + generic_error_remove_page(inode->i_mapping, page); } } @@ -876,6 +887,13 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, iosize = fsize - lnb[0].lnb_file_offset; fsize = max(fsize, i_size_read(obj->oo_inode)); + if (rw & DT_BUFS_TYPE_COMPRESSION) { + CDEBUG(D_INODE, + "Compression - not using page cache for IO at %lld, %ld bytes\n", + pos, len); + cache = false; + goto bypass_checks; + } cache = rw & DT_BUFS_TYPE_READAHEAD; if (cache) goto bypass_checks; @@ -921,7 +939,8 @@ bypass_checks: GFP_HIGHUSER; for (i = 0; i < npages; i++, lnb++) { lnb->lnb_page = osd_get_page(env, dt, lnb->lnb_file_offset, - gfp_mask, cache); + gfp_mask, cache, + rw & DT_BUFS_TYPE_COMPRESSION); if (lnb->lnb_page == NULL) GOTO(cleanup, rc = -ENOMEM); -- 1.8.3.1