Whamcloud - gitweb
LU-16651 llite: hold invalidate_lock when invalidate cache pages 71/50371/4
authorQian Yingjin <qian@ddn.com>
Tue, 21 Mar 2023 08:53:00 +0000 (04:53 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 8 Jul 2023 22:34:42 +0000 (22:34 +0000)
The newer kernel (such as Ubuntu 2204) introduces a new member:
invalidate_lock in the structure @address_space.
The filesystem must exclusively acquire invalidate_lock before
invalidating page cache in truncate / hole punch (and thus calling
into ->invalidatepage) to block races between page cache
invalidation and page cache filling functions (fault, read, ...)

However, current Lustre client does not hold this lock when remove
pages from page cache caused by the revocation of the extent DLM
lock protecting them.
If a client has two overlapped PR DLM extent locks, i.e:
- L1 = <PR, [1M, 4M - 1]
- L2 = <PR, [3M, 5M - 1]
A reader process holds L1 and reads data in range [3M, 4M - 1].
L2 is being revoken due to the conflict access.
Then the page read-in by the reader may be invalidated and deleted
from page cache by the revocation of L2 (in lock blocking AST).

The older kernel will check each page after read whether it was
invalidated and deleted from page cache. If so, it will retry the
page read.

In the newer kernel, it removes this check and retry.
Instead, it introduces a new rw_semaphore in the address_space -
invalidate_lock - that holding the shared lock to protect adding
of pages to page cache for page faults / reads / readahead, and
the exclusive lock to protect invalidating pages, removing them
from page cache for truncate / hole punch.

Thus, in this patch it holds exclusive invalidate_lock in newer
kernels when remove pages from page cache caused by the revocation
of a extent DLM lock protecting them. Otherwsie, it will result in
-EIO error or partial reads in the new added test case sanity/833.

Test-parameters: clientdistro=ubuntu2204 testlist=sanity env=ONLY=833,ONLY_REPEAT=10
Change-Id: If3a27002b89636b9fd4d7b5ea50afa9aeac5d121
Signed-off-by: Qian Yingjin <qian@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50371
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/include/cl_object.h
lustre/llite/vvp_io.c
lustre/osc/osc_cache.c
lustre/tests/sanity.sh

index 31f123b..f1f8fb7 100644 (file)
@@ -3193,6 +3193,29 @@ AC_DEFUN([LC_HAVE_GET_ACL_RCU_ARG], [
 ]) # LC_HAVE_GET_ACL_RCU_ARG
 
 #
+# LC_HAVE_INVALIDATE_LOCK
+#
+# Kernel version v5.15-rc1 commit 730633f0b7f951726e87f912a6323641f674ae34
+# mm: Protect operations adding pages to page cache with invalidate_lock
+#
+AC_DEFUN([LC_HAVE_INVALIDATE_LOCK], [
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_CHECK_COMPILE([if have address_space have invalidate_lock member],
+address_space_invalidate_lock, [
+               #include <linux/fs.h>
+       ],[
+               struct address_space *mapping = NULL;
+
+               filemap_invalidate_lock(mapping);
+       ],[
+               AC_DEFINE(HAVE_INVALIDATE_LOCK, 1,
+                       [address_space invalidate_lock member exists])
+       ])
+EXTRA_KCFLAGS="$tmp_flags"
+]) # LC_HAVE_INVALIDATE_LOCK
+
+#
 # LC_HAVE_SECURITY_DENTRY_INIT_WITH_XATTR_NAME_ARG
 #
 # Linux v5.15-rc1-20-g15bf32398ad4
@@ -4258,6 +4281,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [
 
        # 5.15
        LC_HAVE_GET_ACL_RCU_ARG
+       LC_HAVE_INVALIDATE_LOCK
 
        # 5.16
        LC_HAVE_SECURITY_DENTRY_INIT_WITH_XATTR_NAME_ARG
index cad4702..3c4534e 100644 (file)
@@ -1925,7 +1925,16 @@ struct cl_io {
        /**
         * io_uring direct IO with flags IOCB_NOWAIT.
         */
-                            ci_iocb_nowait:1;
+                            ci_iocb_nowait:1,
+       /**
+        * The filesystem must exclusively acquire invalidate_lock before
+        * invalidating page cache in truncate / hole punch / DLM extent
+        * lock blocking AST path (and thus calling into ->invalidatepage)
+        * to block races between page cache invalidation and page cache
+        * filling functions (fault, read, ...)
+        */
+                            ci_invalidate_page_cache:1;
+
        /**
         * How many times the read has retried before this one.
         * Set by the top level and consumed by the LOV.
index edc821d..a43bf5e 100644 (file)
@@ -303,11 +303,18 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
        CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
        CDEBUG(D_VFSTRACE, DFID" ignore/verify layout %d/%d, layout version %d "
-                          "need write layout %d, restore needed %d\n",
+              "need write layout %d, restore needed %d, invalidate_lock %d\n",
               PFID(lu_object_fid(&obj->co_lu)),
               io->ci_ignore_layout, io->ci_verify_layout,
               vio->vui_layout_gen, io->ci_need_write_intent,
-              io->ci_restore_needed);
+              io->ci_restore_needed, io->ci_invalidate_page_cache);
+
+#ifdef HAVE_INVALIDATE_LOCK
+       if (io->ci_invalidate_page_cache) {
+               filemap_invalidate_unlock(inode->i_mapping);
+               io->ci_invalidate_page_cache = 0;
+       }
+#endif /* HAVE_INVALIDATE_LOCK */
 
        if (io->ci_restore_needed) {
                /* file was detected release, we need to restore it
@@ -1872,6 +1879,11 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
                               PFID(lu_object_fid(&obj->co_lu)), result);
        }
 
+#ifdef HAVE_INVALIDATE_LOCK
+       if (io->ci_invalidate_page_cache)
+               filemap_invalidate_lock(inode->i_mapping);
+#endif /* HAVE_INVALIDATE_LOCK */
+
        io->ci_result = result < 0 ? result : 0;
        RETURN(result);
 }
index 65ad501..746f028 100644 (file)
@@ -3265,6 +3265,7 @@ int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
 
        io->ci_obj = cl_object_top(osc2cl(osc));
        io->ci_ignore_layout = 1;
+       io->ci_invalidate_page_cache = 1;
        io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() +
                                         5 * obd_timeout / 16;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
index fc98d5a..f76a5fa 100755 (executable)
@@ -30011,6 +30011,54 @@ test_832() {
 }
 run_test 832 "lfs rm_entry"
 
+test_833() {
+       local file=$DIR/$tfile
+
+       stack_trap "rm -f $file" EXIT
+       dd if=/dev/zero of=$file bs=1M count=50 || error "Write $file failed"
+
+       local wpid
+       local rpid
+       local rpid2
+
+       # Buffered I/O write
+       (
+               while [ ! -e $DIR/sanity.833.lck ]; do
+                       dd if=/dev/zero of=$file bs=1M count=50 conv=notrunc ||
+                               error "failed to write $file"
+                       sleep 0.$((RANDOM % 4 + 1))
+               done
+       )&
+       wpid=$!
+
+       # Buffered I/O read
+       (
+               while [ ! -e $DIR/sanity.833.lck ]; do
+                       dd if=$file of=/dev/null bs=1M count=50 ||
+                               error "failed to read $file"
+                       sleep 0.$((RANDOM % 4 + 1))
+               done
+       )&
+       rpid=$!
+
+       # Direct I/O read
+       (
+               while [ ! -e $DIR/sanity.833.lck ]; do
+                       dd if=$file of=/dev/null bs=1M count=50 iflag=direct ||
+                               error "failed to read $file in direct I/O mode"
+                       sleep 0.$((RANDOM % 4 + 1))
+               done
+       )&
+       rpid2=$!
+
+       sleep 30
+       touch $DIR/sanity.833.lck
+       wait $wpid || error "$?: buffered write failed"
+       wait $rpid || error "$?: buffered read failed"
+       wait $rpid2 || error "$?: direct read failed"
+}
+run_test 833 "Mixed buffered/direct read and write should not return -EIO"
+
 #
 # tests that do cleanup/setup should be run at the end
 #