Whamcloud - gitweb
LU-17364 llite: don't use stale page. 50/53550/8
authorAlexey Lyashkov <alexey.lyashkov@hpe.com>
Mon, 25 Dec 2023 11:52:35 +0000 (14:52 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 23 Jan 2024 05:40:50 +0000 (05:40 +0000)
using stale page for write might confuse a read path,
which expect any IO page have PG_uptodate flag set,
and it caused an panic with removing from IO.

Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Change-Id: Ia01129ceaecf53d8d9f301c26cd2d65122f6a267
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53550
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/llite/vvp_io.c
lustre/obdclass/cl_page.c
lustre/tests/sanityn.sh

index 8e659bf..d07fefc 100644 (file)
@@ -1518,7 +1518,17 @@ static int vvp_io_fault_start(const struct lu_env *env,
        /* must return locked page */
        if (fio->ft_mkwrite) {
                LASSERT(cfio->ft_vmpage != NULL);
-               lock_page(cfio->ft_vmpage);
+               vmpage = cfio->ft_vmpage;
+               lock_page(vmpage);
+               /**
+                * page was turncated and lock was cancelled, return ENODATA
+                * so that VM_FAULT_NOPAGE will be returned to handle_mm_fault()
+                * XXX: cannot return VM_FAULT_RETRY to vfs since we cannot
+                * release mmap_lock and VM_FAULT_RETRY implies that the
+                * mmap_lock is released.
+                */
+               if (!PageUptodate(vmpage))
+                       GOTO(out, result = -ENODATA);
        } else {
                result = vvp_io_kernel_fault(cfio);
                if (result != 0)
index 44c364d..02cc27b 100644 (file)
@@ -1000,6 +1000,7 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *cp,
                GOTO(out, rc = 0);
 
        lock_page(vmpage);
+       PASSERT(env, cp, PageUptodate(vmpage));
        unlock = true;
 
        if (clear_page_dirty_for_io(vmpage)) {
index 27fc369..3ca4333 100755 (executable)
@@ -827,6 +827,38 @@ test_16j()
 }
 run_test 16j "race dio with buffered i/o"
 
+test_16k() {
+       local fsxN=${FSX_NP:-5}
+       local fsxNops=${FSX_NOPS:-10000}
+       local fsxNparams=${FSXPARAMS_16k:-""}
+       local dropsleep=${DROP_SLEEP:-3}
+       local dpipd
+       local -a fsxpids
+       local cmd
+
+       [ "$SLOW" = "no" ] && fsxNops=1000
+
+       while true; do
+               echo 3 > /proc/sys/vm/drop_caches
+               sleep $dropsleep
+       done &
+       dpipd=$!
+       stack_trap "kill -9 $dpipd"
+
+       for ((i = 1; i <= fsxN; i++)); do
+               cmd="$FSX $fsxNparams -N $fsxNops $DIR/fsxfile.${i} -l $((64 * 1024 * 1024))"
+               echo "+ $cmd"
+               eval $cmd &
+               fsxpids[$i]=$!
+       done
+       for ((i = 1; i <= fsxN; i++)); do
+               wait ${fsxpids[$i]} && echo "+ fsx $i done: rc=$?" ||
+                       error "- fsx $i FAILURE! rc=$?"
+               date
+       done
+}
+run_test 16k "Parallel FSX and drop caches should not panic"
+
 test_17() { # bug 3513, 3667
        remote_ost_nodsh && skip "remote OST with nodsh" && return