From 998e27706e1fac00b5419a3b30aa616288585e06 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 22 May 2024 22:44:49 -0400 Subject: [PATCH] LU-17866 pcc: zero ra_pages explictly for a file after PCC mmap To support mmap under PCC, we do some special magic with mmap to allow Lustre and PCC to share the page mapping. The mapping host (@mapping->host) for the Lustre file is replaced with the PCC copy for mmap. This may result in the wrong setting of @ra_pages for the Lustre file handle with the backing store of the PCC copy in the kernel: ->do_dentry_open()->file_ra_state_init(): file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) { ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages; ra->prev_pos = -1; } Setting readahead pages for a file handle is the last step of the open() call and it is not under the control inside the Lustre file system. Thus, to avoid setting @ra_pages wrongly we set @ra_pages with zero for Lustre file handle explictly in all read I/O path. When invalidate a PCC copy, we will switch back the mapping between Lustre and PCC. We also set mapping->a_ops back with @ll_aops. The readahead path in PCC backend may enter the ->readpage() in Lustre. Then we check whethter the file handle is a Lustre file handle. If not, it should be from mmap readahead I/O path of the PCC copy and return error code directly in this case. Change-Id: Id1e4a9e47bb484e97053759e1743fd2fce040149 Test-Parameters: clientdistro=el8.9 testlist=sanity-pcc env=ONLY=97,ONLY_REPEAT=10 Test-Parameters: clientdistro=el9.3 testlist=sanity-pcc env=ONLY=98,ONLY_REPEAT=10 Signed-off-by: Qian Yingjin Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55181 Tested-by: jenkins Tested-by: Andreas Dilger Reviewed-by: Andreas Dilger --- lustre/llite/pcc.c | 31 +++++++++++++++++++++++-------- lustre/llite/rw.c | 19 +++++++++++++++++-- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 3cf2443..9ad9710 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1948,6 +1948,7 @@ static int pcc_readonly_attach_thread(void *arg) if (IS_ERR_OR_NULL(file)) GOTO(out, rc = file == NULL ? -EINVAL : PTR_ERR(file)); + file->f_ra.ra_pages = 0; rc = pcc_readonly_attach(file, pccx->pccx_inode, pccx->pccx_attach_id, false); pcc_readonly_attach_fini(pccx->pccx_inode); @@ -2279,6 +2280,17 @@ static inline void pcc_inode_mapping_reset(struct inode *inode) truncate_pagecache_range(inode, 0, LUSTRE_EOF); mapping->a_ops = &ll_aops; + /* + * Please note the mapping host (@mapping->host) for the Lustre file is + * replaced with the PCC copy in case of mmap() on the PCC cached file. + * This may result in the setting of @ra_pages of the Lustre file + * handle with the one of the PCC copy wrongly in the kernel: + * ->do_dentry_open()->file_ra_state_init() + * And this is the last step of the open() call and is not under the + * control inside the Lustre file system. + * Thus to avoid the setting of @ra_pages wrongly we set @ra_pages with + * zero explictly in all read I/O path. + */ mapping->host = inode; pcc_inode->i_mapping = &pcc_inode->i_data; @@ -2611,6 +2623,7 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb, ENTRY; + file->f_ra.ra_pages = 0; if (pccf->pccf_file == NULL) { *cached = false; RETURN(0); @@ -2927,6 +2940,7 @@ ssize_t pcc_file_splice_read(struct file *in_file, loff_t *ppos, ENTRY; + in_file->f_ra.ra_pages = 0; if (!pcc_file) RETURN(default_file_splice_read(in_file, ppos, pipe, count, flags)); @@ -3197,18 +3211,19 @@ int pcc_file_mmap(struct file *file, struct vm_area_struct *vma, ENTRY; + /* With PCC, the files are cached in an unusual way, then we do some + * special magic with mmap to allow Lustre and PCC to share the page + * mapping, and the @ra_pages may set with the backing device of PCC + * wrongly in this case. So we must manually set the @ra_pages with + * zero, otherwise it may result in kernel readahead occurring (which + * Lustre does not support). + */ + file->f_ra.ra_pages = 0; + *cached = false; if (!pcc_file || !file_inode(pcc_file)->i_fop->mmap) RETURN(0); - /* with PCC, the files are created in an unusual way, then we do some - * special magic with mmap to allow Lustre and PCC to share the mmap, - * so we must manually set the ra_pages or we can get kernel readahead - * occurring (which Lustre does not support) - */ - pcc_file->f_ra.ra_pages = 0; - file->f_ra.ra_pages = 0; - pcc_inode_lock(inode); pcci = ll_i2pcci(inode); if (pcci && pcc_inode_has_layout(pcci)) { diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 2d55b8f..77a3040 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -1937,6 +1937,20 @@ int ll_readpage(struct file *file, struct page *vmpage) } /* + * This is not a Lustre file handle, and should be a file handle of the + * PCC copy. It is from PCC mmap readahead I/O path and the PCC copy + * was invalidated. + * Here return error code directly as it is from readahead I/O path for + * the PCC copy. + */ + if (inode->i_op != &ll_file_inode_operations) { + CERROR("%s: readpage() on invalidated PCC inode %lu: rc=%d\n", + sb->s_id, inode->i_ino, -EIO); + unlock_page(vmpage); + RETURN(-EIO); + } + + /* * The @vmpage got truncated. * This is a kernel bug introduced in commit v5.11-10234-gcbd59c48ae * ("mm/filemap: use head pages in generic_file_buffered_read") @@ -2097,9 +2111,10 @@ int ll_readpage(struct file *file, struct page *vmpage) ra_assert = true; } #endif - /* See EX-8421 + /* See EX-8421 */ if (ra_assert) - LBUG();*/ + LASSERT(!ra_assert); + /** * Direct read can fall back to buffered read, but DIO is done * with lockless i/o, and buffered requires LDLM locking, so in -- 1.8.3.1