From 5109b9d90e2fa77c034d0b9232a0a18e5afe70ff Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 28 May 2021 22:42:49 -0400 Subject: [PATCH] LU-14711 tests: Ensure no eviction with long cache discard Origin title: LU-14711 tests: Ensure there's no eviction with long cache discard Just pause execution while doing page processing for discard if appropriate failloc is set. Lustre-change: https://review.whamcloud.com/43869 Lustre-commit: TBD (from 3323b40668cddaa1ac6f6644436bd305c189c5ac) Change-Id: If0d04f3cad267cbeeab63040d63e048dcf03cd6b Signed-off-by: Oleg Drokin Test-Parameters: trivial testlist=sanity env=ONLY=903 Reviewed-on: https://review.whamcloud.com/44286 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 1 + lustre/osc/osc_cache.c | 3 +++ lustre/tests/sanity.sh | 15 +++++++++++++++ lustre/tests/test-framework.sh | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 83b1e7c..fd48c28 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -420,6 +420,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_DELAY_IO 0x414 #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 #define OBD_FAIL_OSC_DELAY_CANCEL 0x416 +#define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index c3db5f1..e472f1f 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -3126,6 +3126,9 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, if (!res) break; + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SLOW_PAGE_EVICT, + cfs_fail_val ?: 20); + if (io->ci_type == CIT_MISC && io->u.ci_misc.lm_next_rpc_time && ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1b7506b..a4b7524 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -25614,6 +25614,21 @@ test_902() { } run_test 902 "test short write doesn't hang lustre" +# LU-14711 +test_903() { + $LFS setstripe -i 0 -c 1 $DIR/$tfile $DIR/${tfile}-2 + echo "blah" > $DIR/${tfile}-2 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=6 conv=fsync + #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 + $LCTL set_param fail_loc=0x417 fail_val=20 + + mv $DIR/${tfile}-2 $DIR/$tfile # Destroys the big object + sleep 1 # To start the destroy + wait_destroy_complete 150 || error "Destroy taking too long" + cat $DIR/$tfile > /dev/null || error "Evicted" +} +run_test 903 "Test long page discard does not cause evictions" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index c222f3f..edb4e8d 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3454,7 +3454,7 @@ wait_destroy_complete () { # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do -- 1.8.3.1