From dad3bed7617fba895db169facde91856e89c2b08 Mon Sep 17 00:00:00 2001 From: Alexey Lyashkov Date: Mon, 25 Dec 2023 14:52:35 +0300 Subject: [PATCH] LU-17364 llite: don't use stale page. using stale page for write might confuse a read path, which expect any IO page have PG_uptodate flag set, and it caused an panic with removing from IO. Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Test-Parameters: testlist=sanityn env=SLOW=yes,ONLY=16k,ONLY_REPEAT=10 Signed-off-by: Alexey Lyashkov Change-Id: Ia01129ceaecf53d8d9f301c26cd2d65122f6a267 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53550 Reviewed-by: Patrick Farrell Reviewed-by: Alexander Zarochentsev Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/llite/vvp_io.c | 12 +++++++++++- lustre/obdclass/cl_page.c | 1 + lustre/tests/sanityn.sh | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 8e659bf..d07fefc 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1518,7 +1518,17 @@ static int vvp_io_fault_start(const struct lu_env *env, /* must return locked page */ if (fio->ft_mkwrite) { LASSERT(cfio->ft_vmpage != NULL); - lock_page(cfio->ft_vmpage); + vmpage = cfio->ft_vmpage; + lock_page(vmpage); + /** + * page was turncated and lock was cancelled, return ENODATA + * so that VM_FAULT_NOPAGE will be returned to handle_mm_fault() + * XXX: cannot return VM_FAULT_RETRY to vfs since we cannot + * release mmap_lock and VM_FAULT_RETRY implies that the + * mmap_lock is released. + */ + if (!PageUptodate(vmpage)) + GOTO(out, result = -ENODATA); } else { result = vvp_io_kernel_fault(cfio); if (result != 0) diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 44c364d..02cc27b 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -1000,6 +1000,7 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *cp, GOTO(out, rc = 0); lock_page(vmpage); + PASSERT(env, cp, PageUptodate(vmpage)); unlock = true; if (clear_page_dirty_for_io(vmpage)) { diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 27fc369..3ca4333 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -827,6 +827,38 @@ test_16j() } run_test 16j "race dio with buffered i/o" +test_16k() { + local fsxN=${FSX_NP:-5} + local fsxNops=${FSX_NOPS:-10000} + local fsxNparams=${FSXPARAMS_16k:-""} + local dropsleep=${DROP_SLEEP:-3} + local dpipd + local -a fsxpids + local cmd + + [ "$SLOW" = "no" ] && fsxNops=1000 + + while true; do + echo 3 > /proc/sys/vm/drop_caches + sleep $dropsleep + done & + dpipd=$! + stack_trap "kill -9 $dpipd" + + for ((i = 1; i <= fsxN; i++)); do + cmd="$FSX $fsxNparams -N $fsxNops $DIR/fsxfile.${i} -l $((64 * 1024 * 1024))" + echo "+ $cmd" + eval $cmd & + fsxpids[$i]=$! + done + for ((i = 1; i <= fsxN; i++)); do + wait ${fsxpids[$i]} && echo "+ fsx $i done: rc=$?" || + error "- fsx $i FAILURE! rc=$?" + date + done +} +run_test 16k "Parallel FSX and drop caches should not panic" + test_17() { # bug 3513, 3667 remote_ost_nodsh && skip "remote OST with nodsh" && return -- 1.8.3.1