Whamcloud - gitweb
LU-14711 tests: Ensure there's no eviction with long cache discard 69/43869/9
authorOleg Drokin <green@whamcloud.com>
Sat, 29 May 2021 02:42:49 +0000 (22:42 -0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 17 Sep 2021 14:06:21 +0000 (14:06 +0000)
Just pause execution while doing page processing
for discard if appropriate failloc is set.

Change-Id: If0d04f3cad267cbeeab63040d63e048dcf03cd6b
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Test-Parameters: trivial testlist=sanity env=ONLY=903
Reviewed-on: https://review.whamcloud.com/43869
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Nunez <jnunez@whamcloud.com>
lustre/include/obd_support.h
lustre/osc/osc_cache.c
lustre/tests/sanity.sh
lustre/tests/test-framework.sh

index 5852bf2..83e9f09 100644 (file)
@@ -425,6 +425,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSC_DELAY_IO            0x414
 #define OBD_FAIL_OSC_NO_SIZE_DATA        0x415
 #define OBD_FAIL_OSC_DELAY_CANCEL        0x416
+#define OBD_FAIL_OSC_SLOW_PAGE_EVICT    0x417
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
index cfcdf79..3255696 100644 (file)
@@ -3119,6 +3119,9 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
                if (!res)
                        break;
 
+               OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SLOW_PAGE_EVICT,
+                                cfs_fail_val ?: 20);
+
                if (io->ci_type == CIT_MISC &&
                    io->u.ci_misc.lm_next_rpc_time &&
                    ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) {
index 45b4494..49afa47 100755 (executable)
@@ -27238,6 +27238,21 @@ test_902() {
 }
 run_test 902 "test short write doesn't hang lustre"
 
+# LU-14711
+test_903() {
+       $LFS setstripe -i 0 -c 1 $DIR/$tfile $DIR/${tfile}-2
+       echo "blah" > $DIR/${tfile}-2
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=6 conv=fsync
+       #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417
+       $LCTL set_param fail_loc=0x417 fail_val=20
+
+       mv $DIR/${tfile}-2 $DIR/$tfile # Destroys the big object
+       sleep 1 # To start the destroy
+       wait_destroy_complete 150 || error "Destroy taking too long"
+       cat $DIR/$tfile > /dev/null || error "Evicted"
+}
+run_test 903 "Test long page discard does not cause evictions"
+
 complete $SECONDS
 [ -f $EXT2_DEV ] && rm $EXT2_DEV || true
 check_and_cleanup_lustre
index d6b5ce3..f10815b 100755 (executable)
@@ -3438,7 +3438,7 @@ wait_destroy_complete () {
        # MAX value shouldn't be big as this mean server responsiveness
        # never increase this just to make test pass but investigate
        # why it takes so long time
-       local MAX=5
+       local MAX=${1:-5}
        local WAIT=0
        local list=$(comma_list $(mdts_nodes))
        while [ $WAIT -lt $MAX ]; do