We must clear the 'uptodate' page flag when we delete a
page from Lustre, or stale reads can occur. However,
generic_file_buffered_read requires any pages returned from
readpage() be uptodate.
So, we must retry reading if page truncation happens in
parallel with the read.
This implements the same fix as:
https://review.whamcloud.com/49647
b4da788a819f82d35b685d6ee7f02809c05ca005
did for the mmap path.
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Iae0d1eb343f25a0176135347e54c309056c2613a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50344
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#define OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE 0x1421
#define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422
#define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423
+#define OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424
#define OBD_FAIL_FID_INDIR 0x1501
#define OBD_FAIL_FID_INLMA 0x1502
result = generic_file_read_iter(iocb, iter);
/* If the first page is not in cache, generic_file_aio_read() will be
- * returned with -ENODATA.
- * See corresponding code in ll_readpage(). */
- if (result == -ENODATA)
+ * returned with -ENODATA. Fall back to full read path.
+ * See corresponding code in ll_readpage().
+ *
+ * if we raced with page deletion, we might get EIO. Rather than add
+ * locking to the fast path for this rare case, fall back to the full
+ * read path. (See vvp_io_read_start() for rest of handling.
+ */
+ if (result == -ENODATA || result == -EIO)
result = 0;
if (result > 0) {
if (ra.cra_release != NULL)
cl_read_ahead_release(env, &ra);
+ /* this delay gives time for the actual read of the page to finish and
+ * unlock the page in vvp_page_completion_read before we return to our
+ * caller and the caller tries to use the page, allowing us to test
+ * races with the page being unlocked after readpage() but before it's
+ * used by the caller
+ */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_READPAGE_PAUSE2, cfs_fail_val);
+
RETURN(result);
}
struct lu_env *env;
int result = 0;
+ ENTRY;
+
LASSERT(PageLocked(vmpage));
if (PageWriteback(vmpage) || PageDirty(vmpage))
- return 0;
+ RETURN(0);
mapping = vmpage->mapping;
if (mapping == NULL)
- return 1;
+ RETURN(1);
obj = ll_i2info(mapping->host)->lli_clob;
if (obj == NULL)
- return 1;
+ RETURN(1);
page = cl_vmpage_page(vmpage, obj);
if (page == NULL)
- return 1;
+ RETURN(1);
env = cl_env_percpu_get();
LASSERT(!IS_ERR(env));
cl_page_put(env, page);
cl_env_percpu_put(env);
- return result;
+ RETURN(result);
}
#ifdef HAVE_AOPS_RELEASE_FOLIO
size_t cnt = io->u.ci_rd.rd.crw_count;
size_t tot = vio->vui_tot_count;
struct ll_cl_context *lcc;
+ unsigned int seq;
int exceed = 0;
int result;
+ int total_bytes_read = 0;
struct iov_iter iter;
pgoff_t page_offset;
lcc->lcc_end_index = DIV_ROUND_UP(pos + iter.count, PAGE_SIZE);
CDEBUG(D_VFSTRACE, "count:%ld iocb pos:%lld\n", iter.count, pos);
- result = generic_file_read_iter(vio->vui_iocb, &iter);
+ /* this seqlock lets us notice if a page has been deleted on this inode
+ * during the fault process, allowing us to catch an erroneous short
+ * read or EIO
+ * See LU-16160
+ */
+ do {
+ seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock);
+ result = generic_file_read_iter(vio->vui_iocb, &iter);
+ if (result >= 0) {
+ io->ci_nob += result;
+ total_bytes_read += result;
+ }
+ /* if we got a short read or -EIO and we raced with page invalidation,
+ * retry
+ */
+ } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) &&
+ ((result >= 0 && iov_iter_count(&iter) > 0)
+ || result == -EIO));
out:
if (result >= 0) {
- if (result < cnt)
+ if (total_bytes_read < cnt)
io->ci_continue = 0;
- io->ci_nob += result;
result = 0;
} else if (result == -EIOCBQUEUED) {
io->ci_nob += vio->u.readwrite.vui_read;
THETESTS += create_foreign_dir parse_foreign_dir
THETESTS += check_fallocate splice-test lseek_test expand_truncate_test
THETESTS += foreign_symlink_striping lov_getstripe_old io_uring_probe
+THETESTS += fadvise_dontneed_helper
if LIBAIO
THETESTS += aiocp
--- /dev/null
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ int rc;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s file\n", argv[0]);
+ return 1;
+ }
+
+ fd = open(argv[1], O_RDWR, 0);
+ if (fd <= 0) {
+ fprintf(stderr,
+ "open failed on %s, error: %s\n",
+ argv[1], strerror(errno));
+ return errno;
+ }
+
+ rc = posix_fadvise(fd, 0, 1024 * 1024, POSIX_FADV_DONTNEED);
+ if (rc) {
+ fprintf(stderr,
+ "fadvise FADV_DONTNEED failed on %s, error: %s\n",
+ argv[1], strerror(errno));
+ return errno;
+ }
+ return 0;
+}
}
run_test 94 "signal vs CP callback race"
-test_95() {
+test_95a() {
local file=$DIR/$tfile
local file2=$DIR2/$tfile
local fast_read_save
kill -USR1 $pid && wait $pid || error "wait for PID $pid failed"
}
-run_test 95 "Check readpage() on a page that was removed from page cache"
+run_test 95a "Check readpage() on a page that was removed from page cache"
+
+test_95b() {
+ local file=$DIR/$tfile
+ local file2=$DIR2/$tfile
+ local fast_read_save
+ local pid
+
+ fast_read_save=$($LCTL get_param -n llite.*.fast_read | head -n 1)
+ [ -z "$fast_read_save" ] && skip "no fast read support"
+
+ stack_trap "$LCTL set_param llite.*.fast_read=$fast_read_save" EXIT
+ $LCTL set_param llite.*.fast_read=0
+
+ $LFS setstripe -c $OSTCOUNT $file || error "failed to setstripe $file"
+ dd if=/dev/zero of=$file bs=$((PAGE_SIZE * 3)) count=1 ||
+ error "failed to write $file"
+
+ # This does the read from the second mount, so this flushes the pages
+ # the first mount and creates new ones on the second mount
+ # OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424
+ $LCTL set_param fail_loc=0x80001424 fail_val=5
+ $MULTIOP $file2 or${PAGE_SIZE}c &
+ pid=$!
+
+ sleep 2
+ fadvise_dontneed_helper $file2
+ $LCTL set_param fail_loc=0
+ sleep 4
+ wait $pid || error "failed to read file"
+}
+run_test 95b "Check readpage() on a page that is no longer uptodate"
# Data-on-MDT tests
test_100a() {