From b33808d3aebb06cf0660baee1186b2f944d9d06d Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 23 Nov 2022 08:03:41 -0500 Subject: [PATCH] LU-16338 readahead: clip readahead with kms During I/O test, it found that the read-ahead pages reach 255 for small files with only several KiB. The amount of read data reaches more than 1MiB. The reason is that the granted DLM extent lock is [0, EOF], which is larger than the requested extent. During readahead, the OSC layer will also return [0, EOF] extent which will clip into stripe size (1MiB) regardless the actual object size. In this patch, the readahead range is clipped to the known min size (kms) on OSC layer during readahead. By this way, the read-ahead data will not beyong the last page of the file. Add sanity/101m to verify it. This patch also fixes multiop to return successfully when reaching EOF instead of exiting with ENODATA during read. Test-Parameters: testlist=sanity env=ONLY=101k,ONLY_REPEAT=3 Signed-off-by: Qian Yingjin Change-Id: I285e3e1d84ad06231039306106c74d775c1b0b50 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49226 Reviewed-by: Andreas Dilger Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/osc/osc_io.c | 5 ++++ lustre/tests/multiop.c | 2 +- lustre/tests/sanity.sh | 62 +++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 524ab9b..90437ed 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -85,6 +85,8 @@ static int osc_io_read_ahead(const struct lu_env *env, oio->oi_is_readahead = true; dlmlock = osc_dlmlock_at_pgoff(env, osc, start, 0); if (dlmlock != NULL) { + struct lov_oinfo *oinfo = osc->oo_oinfo; + LASSERT(dlmlock->l_ast_data == osc); if (dlmlock->l_req_mode != LCK_PR) { struct lustre_handle lockh; @@ -101,6 +103,9 @@ static int osc_io_read_ahead(const struct lu_env *env, ra->cra_oio = oio; if (ra->cra_end_idx != CL_PAGE_EOF) ra->cra_contention = true; + ra->cra_end_idx = min_t(pgoff_t, ra->cra_end_idx, + cl_index(osc2cl(osc), + oinfo->loi_kms - 1)); result = 0; } diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index e206785..5241027 100644 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -614,7 +614,7 @@ int main(int argc, char **argv) fprintf(stderr, "short read: %lld/%u\n", rc, len); if (rc == 0) - exit(ENODATA); + break; } len -= rc; if (verbose >= 2) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index ff80e57..bd32050 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11393,19 +11393,65 @@ test_101j() { } run_test 101j "A complete read block should be submitted when no RA" -test_101k() -{ +test_readahead_base() { local file=$DIR/$tfile - - check_set_fallocate_or_skip + local size=$1 + local iosz + local ramax + local ranum $LCTL set_param -n llite.*.read_ahead_stats=0 - fallocate -l 16K $file || error "failed to fallocate $file" + # The first page is not accounted into readahead + ramax=$(((size + PAGE_SIZE - 1) / PAGE_SIZE - 1)) + iosz=$(((size + 1048575) / 1048576 * 1048576)) + echo "Test readahead: size=$size ramax=$ramx iosz=$iosz" + + $LCTL mark "Test readahead: size=$size ramax=$ramx iosz=$iosz" + fallocate -l $size $file || error "failed to fallocate $file" cancel_lru_locks osc - $MULTIOP $file or1048576c - $LCTL get_param llite.*.read_ahead_stats + $MULTIOP $file or${iosz}c || error "failed to read $file" + $LCTL get_param -n llite.*.read_ahead_stats + ranum=$($LCTL get_param -n llite.*.read_ahead_stats | + awk '/readahead.pages/ { print $7 }' | calc_total) + (( $ranum <= $ramax )) || + error "read-ahead pages is $ranum more than $ramax" + rm -rf $file || error "failed to remove $file" } -run_test 101k "read ahead for small file" + +test_101m() +{ + local file=$DIR/$tfile + local ramax + local ranum + local size + local iosz + + check_set_fallocate_or_skip + stack_trap "rm -f $file" EXIT + + test_readahead_base 4096 + + # file size: 16K = 16384 + test_readahead_base 16384 + test_readahead_base 16385 + test_readahead_base 16383 + + # file size: 1M + 1 = 1048576 + 1 + test_readahead_base 1048577 + # file size: 1M + 16K + test_readahead_base $((1048576 + 16384)) + + # file size: stripe_size * (stripe_count - 1) + 16K + $LFS setstripe -c -1 $file || error "failed to setstripe $file" + test_readahead_base $((1048576 * (OSTCOUNT - 1) + 16384)) + # file size: stripe_size * stripe_count + 16K + $LFS setstripe -c -1 $file || error "failed to setstripe $file" + test_readahead_base $((1048576 * OSTCOUNT + 16384)) + # file size: 2 * stripe_size * stripe_count + 16K + $LFS setstripe -c -1 $file || error "failed to setstripe $file" + test_readahead_base $((2 * 1048576 * OSTCOUNT + 16384)) +} +run_test 101m "read ahead for small file and last stripe of the file" setup_test102() { test_mkdir $DIR/$tdir -- 1.8.3.1