LU-17364 llite: don't use stale page.

author Alexey Lyashkov <alexey.lyashkov@hpe.com>

Fri, 12 Jan 2024 18:55:55 +0000 (13:55 -0500)

committer Andreas Dilger <adilger@whamcloud.com>

Thu, 18 Jan 2024 09:24:36 +0000 (09:24 +0000)
author Alexey Lyashkov <alexey.lyashkov@hpe.com>
Fri, 12 Jan 2024 18:55:55 +0000 (13:55 -0500)
committer Andreas Dilger <adilger@whamcloud.com>
Thu, 18 Jan 2024 09:24:36 +0000 (09:24 +0000)
diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c

index ed543b0..94436cb 100644 (file)
--- a/lustre/llite/vvp_io.c
+++ b/lustre/llite/vvp_io.c
@@ -1564,7 +1564,17 @@ static int vvp_io_fault_start(const struct lu_env *env,
         /* must return locked page */
         if (fio->ft_mkwrite) {
                 LASSERT(cfio->ft_vmpage != NULL);
-               lock_page(cfio->ft_vmpage);
+               vmpage = cfio->ft_vmpage;
+               lock_page(vmpage);
+               /**
+                * page was turncated and lock was cancelled, return ENODATA
+                * so that VM_FAULT_NOPAGE will be returned to handle_mm_fault()
+                * XXX: cannot return VM_FAULT_RETRY to vfs since we cannot
+                * release mmap_lock and VM_FAULT_RETRY implies that the
+                * mmap_lock is released.
+                */
+               if (!PageUptodate(vmpage))
+                       GOTO(out, result = -ENODATA);
         } else {
                 result = vvp_io_kernel_fault(cfio);
                 if (result != 0)
diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c

index 85319f6..b36ec85 100644 (file)
--- a/lustre/obdclass/cl_page.c
+++ b/lustre/obdclass/cl_page.c
@@ -1087,6 +1087,7 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *cl_page,
                 RETURN(-EINVAL);
  
         lock_page(vmpage);
+       PASSERT(env, cl_page, PageUptodate(vmpage));
         cl_page_slice_for_each(cl_page, slice, i) {
                 if (slice->cpl_ops->io[crt].cpo_make_ready != NULL)
                         result = (*slice->cpl_ops->io[crt].cpo_make_ready)(env, slice);
diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh

index 8c74126..9091c52 100755 (executable)
--- a/lustre/tests/sanityn.sh
+++ b/lustre/tests/sanityn.sh
@@ -698,6 +698,38 @@ test_16i() {
  }
  run_test 16i "read after truncate file"
  
+test_16k() {
+       local fsxN=${FSX_NP:-5}
+       local fsxNops=${FSX_NOPS:-10000}
+       local fsxNparams=${FSXPARAMS_16k:-""}
+       local dropsleep=${DROP_SLEEP:-3}
+       local dpipd
+       local -a fsxpids
+       local cmd
+
+       [ "$SLOW" = "no" ] && fsxNops=1000
+
+       while true; do
+               echo 3 > /proc/sys/vm/drop_caches
+               sleep $dropsleep
+       done &
+       dpipd=$!
+       stack_trap "kill -9 $dpipd"
+
+       for ((i = 1; i <= fsxN; i++)); do
+               cmd="$FSX $fsxNparams -N $fsxNops $DIR/fsxfile.${i} -l $((64 * 1024 * 1024))"
+               echo "+ $cmd"
+               eval $cmd &
+               fsxpids[$i]=$!
+       done
+       for ((i = 1; i <= fsxN; i++)); do
+               wait ${fsxpids[$i]} && echo "+ fsx $i done: rc=$?" ||
+                       error "- fsx $i FAILURE! rc=$?"
+               date
+       done
+}
+run_test 16k "Parallel FSX and drop caches should not panic"
+
  test_17() { # bug 3513, 3667
         remote_ost_nodsh && skip "remote OST with nodsh" && return
author	Alexey Lyashkov <alexey.lyashkov@hpe.com>
	Fri, 12 Jan 2024 18:55:55 +0000 (13:55 -0500)
committer	Andreas Dilger <adilger@whamcloud.com>
	Thu, 18 Jan 2024 09:24:36 +0000 (09:24 +0000)
lustre/llite/vvp_io.c		patch \| blob \| history
lustre/obdclass/cl_page.c		patch \| blob \| history
lustre/tests/sanityn.sh		patch \| blob \| history