From c0a7f78529e21c9cafa986abea255925b4b41244 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 28 May 2021 22:42:49 -0400 Subject: [PATCH] LU-14711 tests: Ensure there's no eviction with long cache discard Just pause execution while doing page processing for discard if appropriate failloc is set. Change-Id: If0d04f3cad267cbeeab63040d63e048dcf03cd6b Signed-off-by: Oleg Drokin Test-Parameters: trivial testlist=sanity env=ONLY=903 Reviewed-on: https://review.whamcloud.com/43869 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: James Nunez --- lustre/include/obd_support.h | 1 + lustre/osc/osc_cache.c | 3 +++ lustre/tests/sanity.sh | 15 +++++++++++++++ lustre/tests/test-framework.sh | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 5852bf2..83e9f09 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -425,6 +425,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_DELAY_IO 0x414 #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 #define OBD_FAIL_OSC_DELAY_CANCEL 0x416 +#define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index cfcdf79..3255696 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -3119,6 +3119,9 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, if (!res) break; + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SLOW_PAGE_EVICT, + cfs_fail_val ?: 20); + if (io->ci_type == CIT_MISC && io->u.ci_misc.lm_next_rpc_time && ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 45b4494..49afa47 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -27238,6 +27238,21 @@ test_902() { } run_test 902 "test short write doesn't hang lustre" +# LU-14711 +test_903() { + $LFS setstripe -i 0 -c 1 $DIR/$tfile $DIR/${tfile}-2 + echo "blah" > $DIR/${tfile}-2 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=6 conv=fsync + #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 + $LCTL set_param fail_loc=0x417 fail_val=20 + + mv $DIR/${tfile}-2 $DIR/$tfile # Destroys the big object + sleep 1 # To start the destroy + wait_destroy_complete 150 || error "Destroy taking too long" + cat $DIR/$tfile > /dev/null || error "Evicted" +} +run_test 903 "Test long page discard does not cause evictions" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index d6b5ce3..f10815b 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3438,7 +3438,7 @@ wait_destroy_complete () { # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do -- 1.8.3.1