From 35fb413683affe9d41f3521259c260de6caf81d9 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Fri, 26 Jan 2024 18:06:50 +0800 Subject: [PATCH 1/1] LU-17482 llite: short read could mess up next read offset When read reaches EOF, it could read data from stale pagecache, but we need to restore the iocb->ki_pos so that next read could continue from the correct offset. Fixes: 4468f6c9d9 ("LU-16025 llite: adjust read count as file got truncated") Signed-off-by: Bobi Jam Change-Id: Ib8b62c41bf65f8efec82dda53fcfbdb68ad08b38 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53827 Reviewed-by: Andreas Dilger Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin Reviewed-by: Patrick Farrell Tested-by: jenkins Tested-by: Maloo --- lustre/include/obd_support.h | 1 + lustre/llite/file.c | 4 ++++ lustre/tests/multiop.c | 8 ++++++-- lustre/tests/sanity.sh | 26 ++++++++++++++++++++++++-- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 8036c6a..d7579eb 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -637,6 +637,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_LOV_COMP_PATTERN 0x1427 #define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 #define OBD_FAIL_LLITE_DELAY_TRUNCATE 0x1430 +#define OBD_FAIL_LLITE_READ_PAUSE 0x1431 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index d298cbf..bd2648f 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2176,6 +2176,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct lu_env *env; struct vvp_io_args *args; struct file *file = iocb->ki_filp; + loff_t orig_ki_pos = iocb->ki_pos; ssize_t result; ssize_t rc2; __u16 refcheck; @@ -2204,6 +2205,8 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) else if (result > 0) stale_data = true; + CFS_FAIL_TIMEOUT_ORSET(OBD_FAIL_LLITE_READ_PAUSE, CFS_FAIL_ONCE, + cfs_fail_val); /** * Currently when PCC read failed, we do not fall back to the * normal read path, just return the error. @@ -2253,6 +2256,7 @@ out: * we've reached EOF before the read, the data read are cached * stale data. */ + iocb->ki_pos = orig_ki_pos; iov_iter_truncate(to, 0); result = 0; } diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 48886ac..69da675 100644 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -638,6 +638,9 @@ int main(int argc, char **argv) } while (len > 0) { + off_t start, off; + + start = lseek(fd, 0, SEEK_CUR); rc = read(fd, buf_align, len); if (rc == -1) { save_errno = errno; @@ -645,8 +648,9 @@ int main(int argc, char **argv) exit(save_errno); } if (rc < len) { - fprintf(stderr, "short read: %lld/%u\n", - rc, len); + off = lseek(fd, 0, SEEK_CUR); + fprintf(stderr, "short read: %ld ->+ %u -> %ld %lld\n", + start, len, off, rc); if (rc == 0) break; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 7073096..75b4347 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -23867,7 +23867,7 @@ test_250() { } run_test 250 "Write above 16T limit" -test_251() { +test_251a() { $LFS setstripe -c -1 -S 1048576 $DIR/$tfile #define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407 @@ -23882,7 +23882,29 @@ test_251() { rm -f $DIR/$tfile } -run_test 251 "Handling short read and write correctly" +run_test 251a "Handling short read and write correctly" + +test_251b() { + dd if=/dev/zero of=$DIR/$tfile bs=1k count=4 || + error "write $tfile failed" + + sleep 2 && echo 12345 >> $DIR/$tfile & + + #define OBD_FAIL_LLITE_READ_PAUSE 0x1431 + $LCTL set_param fail_loc=0x1431 fail_val=5 + # seek to 4096, 2 seconds later, file size expand to 4102, and after + # 5 seconds, read 10 bytes, the short read should + # report: + # start ->+ read_len -> offset_after_read read_count + # short read: 4096 ->+ 10 -> 4096 0 + # not: + # short read: 4096 ->+ 10 -> 4102 0 + local off=$($MULTIOP $DIR/$tfile oO_RDONLY:z4096r10c 2>&1 | \ + awk '/short read/ { print $7 }') + (( off == 4096 )) || + error "short read should set offset at 4096, not $off" +} +run_test 251b "short read restore offset correctly" test_252() { remote_mds_nodsh && skip "remote MDS with nodsh" -- 1.8.3.1