From bba59b1287c9cd8c30a85fafb4fd5788452bd05c Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Tue, 21 Mar 2023 04:53:00 -0400 Subject: [PATCH] LU-16651 llite: hold invalidate_lock when invalidate cache pages The newer kernel (such as Ubuntu 2204) introduces a new member: invalidate_lock in the structure @address_space. The filesystem must exclusively acquire invalidate_lock before invalidating page cache in truncate / hole punch (and thus calling into ->invalidatepage) to block races between page cache invalidation and page cache filling functions (fault, read, ...) However, current Lustre client does not hold this lock when remove pages from page cache caused by the revocation of the extent DLM lock protecting them. If a client has two overlapped PR DLM extent locks, i.e: - L1 = Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50371 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Oleg Drokin Reviewed-by: Patrick Farrell --- lustre/autoconf/lustre-core.m4 | 24 +++++++++++++++++++++ lustre/include/cl_object.h | 11 +++++++++- lustre/llite/vvp_io.c | 16 ++++++++++++-- lustre/osc/osc_cache.c | 1 + lustre/tests/sanity.sh | 48 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 3 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 31f123b..f1f8fb7 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -3193,6 +3193,29 @@ AC_DEFUN([LC_HAVE_GET_ACL_RCU_ARG], [ ]) # LC_HAVE_GET_ACL_RCU_ARG # +# LC_HAVE_INVALIDATE_LOCK +# +# Kernel version v5.15-rc1 commit 730633f0b7f951726e87f912a6323641f674ae34 +# mm: Protect operations adding pages to page cache with invalidate_lock +# +AC_DEFUN([LC_HAVE_INVALIDATE_LOCK], [ +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_CHECK_COMPILE([if have address_space have invalidate_lock member], +address_space_invalidate_lock, [ + #include + ],[ + struct address_space *mapping = NULL; + + filemap_invalidate_lock(mapping); + ],[ + AC_DEFINE(HAVE_INVALIDATE_LOCK, 1, + [address_space invalidate_lock member exists]) + ]) +EXTRA_KCFLAGS="$tmp_flags" +]) # LC_HAVE_INVALIDATE_LOCK + +# # LC_HAVE_SECURITY_DENTRY_INIT_WITH_XATTR_NAME_ARG # # Linux v5.15-rc1-20-g15bf32398ad4 @@ -4258,6 +4281,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [ # 5.15 LC_HAVE_GET_ACL_RCU_ARG + LC_HAVE_INVALIDATE_LOCK # 5.16 LC_HAVE_SECURITY_DENTRY_INIT_WITH_XATTR_NAME_ARG diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index cad4702..3c4534e 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1925,7 +1925,16 @@ struct cl_io { /** * io_uring direct IO with flags IOCB_NOWAIT. */ - ci_iocb_nowait:1; + ci_iocb_nowait:1, + /** + * The filesystem must exclusively acquire invalidate_lock before + * invalidating page cache in truncate / hole punch / DLM extent + * lock blocking AST path (and thus calling into ->invalidatepage) + * to block races between page cache invalidation and page cache + * filling functions (fault, read, ...) + */ + ci_invalidate_page_cache:1; + /** * How many times the read has retried before this one. * Set by the top level and consumed by the LOV. diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index edc821d..a43bf5e 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -303,11 +303,18 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID" ignore/verify layout %d/%d, layout version %d " - "need write layout %d, restore needed %d\n", + "need write layout %d, restore needed %d, invalidate_lock %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, vio->vui_layout_gen, io->ci_need_write_intent, - io->ci_restore_needed); + io->ci_restore_needed, io->ci_invalidate_page_cache); + +#ifdef HAVE_INVALIDATE_LOCK + if (io->ci_invalidate_page_cache) { + filemap_invalidate_unlock(inode->i_mapping); + io->ci_invalidate_page_cache = 0; + } +#endif /* HAVE_INVALIDATE_LOCK */ if (io->ci_restore_needed) { /* file was detected release, we need to restore it @@ -1872,6 +1879,11 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, PFID(lu_object_fid(&obj->co_lu)), result); } +#ifdef HAVE_INVALIDATE_LOCK + if (io->ci_invalidate_page_cache) + filemap_invalidate_lock(inode->i_mapping); +#endif /* HAVE_INVALIDATE_LOCK */ + io->ci_result = result < 0 ? result : 0; RETURN(result); } diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 65ad501..746f028 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -3265,6 +3265,7 @@ int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, io->ci_obj = cl_object_top(osc2cl(osc)); io->ci_ignore_layout = 1; + io->ci_invalidate_page_cache = 1; io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() + 5 * obd_timeout / 16; result = cl_io_init(env, io, CIT_MISC, io->ci_obj); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index fc98d5a..f76a5fa 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -30011,6 +30011,54 @@ test_832() { } run_test 832 "lfs rm_entry" +test_833() { + local file=$DIR/$tfile + + stack_trap "rm -f $file" EXIT + dd if=/dev/zero of=$file bs=1M count=50 || error "Write $file failed" + + local wpid + local rpid + local rpid2 + + # Buffered I/O write + ( + while [ ! -e $DIR/sanity.833.lck ]; do + dd if=/dev/zero of=$file bs=1M count=50 conv=notrunc || + error "failed to write $file" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + wpid=$! + + # Buffered I/O read + ( + while [ ! -e $DIR/sanity.833.lck ]; do + dd if=$file of=/dev/null bs=1M count=50 || + error "failed to read $file" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + rpid=$! + + # Direct I/O read + ( + while [ ! -e $DIR/sanity.833.lck ]; do + dd if=$file of=/dev/null bs=1M count=50 iflag=direct || + error "failed to read $file in direct I/O mode" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + rpid2=$! + + sleep 30 + touch $DIR/sanity.833.lck + wait $wpid || error "$?: buffered write failed" + wait $rpid || error "$?: buffered read failed" + wait $rpid2 || error "$?: direct read failed" +} +run_test 833 "Mixed buffered/direct read and write should not return -EIO" + # # tests that do cleanup/setup should be run at the end # -- 1.8.3.1