LU-17364 llite: don't use stale page.

author Alexey Lyashkov <alexey.lyashkov@hpe.com>

Mon, 25 Dec 2023 11:52:35 +0000 (14:52 +0300)

committer Oleg Drokin <green@whamcloud.com>

Tue, 23 Jan 2024 05:40:50 +0000 (05:40 +0000)
author Alexey Lyashkov <alexey.lyashkov@hpe.com>
Mon, 25 Dec 2023 11:52:35 +0000 (14:52 +0300)
committer Oleg Drokin <green@whamcloud.com>
Tue, 23 Jan 2024 05:40:50 +0000 (05:40 +0000)
diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c

index 8e659bf..d07fefc 100644 (file)
--- a/lustre/llite/vvp_io.c
+++ b/lustre/llite/vvp_io.c
@@ -1518,7 +1518,17 @@ static int vvp_io_fault_start(const struct lu_env *env,
         /* must return locked page */
         if (fio->ft_mkwrite) {
                 LASSERT(cfio->ft_vmpage != NULL);
-               lock_page(cfio->ft_vmpage);
+               vmpage = cfio->ft_vmpage;
+               lock_page(vmpage);
+               /**
+                * page was turncated and lock was cancelled, return ENODATA
+                * so that VM_FAULT_NOPAGE will be returned to handle_mm_fault()
+                * XXX: cannot return VM_FAULT_RETRY to vfs since we cannot
+                * release mmap_lock and VM_FAULT_RETRY implies that the
+                * mmap_lock is released.
+                */
+               if (!PageUptodate(vmpage))
+                       GOTO(out, result = -ENODATA);
         } else {
                 result = vvp_io_kernel_fault(cfio);
                 if (result != 0)
diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c

index 44c364d..02cc27b 100644 (file)
--- a/lustre/obdclass/cl_page.c
+++ b/lustre/obdclass/cl_page.c
@@ -1000,6 +1000,7 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *cp,
                 GOTO(out, rc = 0);
  
         lock_page(vmpage);
+       PASSERT(env, cp, PageUptodate(vmpage));
         unlock = true;
  
         if (clear_page_dirty_for_io(vmpage)) {
diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh

index 27fc369..3ca4333 100755 (executable)
--- a/lustre/tests/sanityn.sh
+++ b/lustre/tests/sanityn.sh
@@ -827,6 +827,38 @@ test_16j()
  }
  run_test 16j "race dio with buffered i/o"
  
+test_16k() {
+       local fsxN=${FSX_NP:-5}
+       local fsxNops=${FSX_NOPS:-10000}
+       local fsxNparams=${FSXPARAMS_16k:-""}
+       local dropsleep=${DROP_SLEEP:-3}
+       local dpipd
+       local -a fsxpids
+       local cmd
+
+       [ "$SLOW" = "no" ] && fsxNops=1000
+
+       while true; do
+               echo 3 > /proc/sys/vm/drop_caches
+               sleep $dropsleep
+       done &
+       dpipd=$!
+       stack_trap "kill -9 $dpipd"
+
+       for ((i = 1; i <= fsxN; i++)); do
+               cmd="$FSX $fsxNparams -N $fsxNops $DIR/fsxfile.${i} -l $((64 * 1024 * 1024))"
+               echo "+ $cmd"
+               eval $cmd &
+               fsxpids[$i]=$!
+       done
+       for ((i = 1; i <= fsxN; i++)); do
+               wait ${fsxpids[$i]} && echo "+ fsx $i done: rc=$?" ||
+                       error "- fsx $i FAILURE! rc=$?"
+               date
+       done
+}
+run_test 16k "Parallel FSX and drop caches should not panic"
+
  test_17() { # bug 3513, 3667
         remote_ost_nodsh && skip "remote OST with nodsh" && return
author	Alexey Lyashkov <alexey.lyashkov@hpe.com>
	Mon, 25 Dec 2023 11:52:35 +0000 (14:52 +0300)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 23 Jan 2024 05:40:50 +0000 (05:40 +0000)
lustre/llite/vvp_io.c		patch \| blob \| history
lustre/obdclass/cl_page.c		patch \| blob \| history
lustre/tests/sanityn.sh		patch \| blob \| history