From 7bb1e211d217d5a82ac2d5e4edad5ae018090761 Mon Sep 17 00:00:00 2001 From: Andrew Perepechko Date: Tue, 26 Dec 2023 20:02:12 +0300 Subject: [PATCH] LU-16637 llite: tolerate fresh page cache pages after truncate Truncate called by ll_layout_refesh() can race with a fast read or tiny write, which can add an uninitialized non-uptodate page into the page cache. We want to avoid expensive locking for this rare case so if there is any leftover in the cache after truncate, just check that the pages are not uptodate, not dirty and do not have any filesystem-specific information attached to them. Change-Id: I8cadc022a3d1822a585f32e1a765e59ad0ff434d Signed-off-by: Andrew Perepechko HPE-bug-id: LUS-11937 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53554 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Zhenyu Xu Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/llite/llite_lib.c | 40 ++++++++++++++++++++++++++++++++++------ lustre/llite/rw26.c | 3 ++- lustre/tests/sanity.sh | 17 +++++++++++++++++ lustre/utils/lfs.c | 13 +++++++++---- 5 files changed, 63 insertions(+), 11 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 465382d..fba127e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -634,6 +634,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_LOV_COMP_MAGIC 0x1426 #define OBD_FAIL_LOV_COMP_PATTERN 0x1427 #define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 +#define OBD_FAIL_LLITE_DELAY_TRUNCATE 0x1430 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index c190b8e..7580a4c 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -3046,6 +3046,8 @@ void ll_truncate_inode_pages_final(struct inode *inode) truncate_inode_pages_final(mapping); + CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_DELAY_TRUNCATE, 5); + /* Workaround for LU-118: Note nrpages may not be totally updated when * truncate_inode_pages() returns, as there can be a page in the process * of deletion (inside __delete_from_page_cache()) in the specified @@ -3060,12 +3062,38 @@ void ll_truncate_inode_pages_final(struct inode *inode) ll_xa_unlock_irqrestore(&mapping->i_pages, flags); } /* Workaround end */ - LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu " - "state %#lx, lli_flags %#lx, " - "see https://jira.whamcloud.com/browse/LU-118\n", - ll_i2sbi(inode)->ll_fsname, - PFID(ll_inode2fid(inode)), inode, nrpages, - inode->i_state, ll_i2info(inode)->lli_flags); + if (nrpages) { +#ifdef HAVE_XARRAY_SUPPORT + XA_STATE(xas, &mapping->i_pages, 0); + struct page *page; +#endif + CWARN("%s: inode="DFID"(%p) nrpages=%lu " + "state %#lx, lli_flags %#lx, " + "see https://jira.whamcloud.com/browse/LU-118\n", + ll_i2sbi(inode)->ll_fsname, + PFID(ll_inode2fid(inode)), inode, nrpages, + inode->i_state, ll_i2info(inode)->lli_flags); +#ifdef HAVE_XARRAY_SUPPORT + rcu_read_lock(); + xas_for_each(&xas, page, ULONG_MAX) { + if (xas_retry(&xas, page)) + continue; + + if (xa_is_value(page)) + continue; + + /* + * We can only have non-uptodate pages + * without internal state at this point + */ + LASSERTF(!PageUptodate(page) && + !PageDirty(page) && + !PagePrivate(page), + "%p", page); + } + rcu_read_unlock(); +#endif + } } int ll_read_inode2(struct inode *inode, void *opaque) diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 298b8f1..5586468 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -820,7 +820,8 @@ static int ll_write_begin(struct file *file, struct address_space *mapping, lcc = ll_cl_find(inode); if (lcc == NULL) { - vmpage = grab_cache_page_nowait(mapping, index); + /* do not allocate a page, only find & lock */ + vmpage = find_lock_page(mapping, index); result = ll_tiny_write_begin(vmpage, mapping); GOTO(out, result); } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index be98d8a..71deb1d 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -29950,6 +29950,23 @@ test_440() { } run_test 440 "bash completion for lfs, lctl" +test_442() { + local pid1 + local pid2 + mkdir -p $DIR/$tdir + multiop $DIR/$tdir/$tfile.1 O_w1 & pid1=$! + multiop $DIR/$tdir/$tfile.1 O_w1 & pid2=$! + sleep 1 + touch $DIR/$tdir/$tfile.2 + $LFS swap_layouts -n $DIR/$tdir/$tfile.1 $DIR/$tdir/$tfile.2 + $LCTL set_param fail_loc=0x1430 + kill -USR1 $pid1 + sleep 1 + kill -USR1 $pid2 + wait +} +run_test 442 "truncate vs read/write should not panic" + prep_801() { [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] && diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index c0dbbc9..4566994 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -10742,12 +10742,17 @@ static int lfs_hsm_cancel(int argc, char **argv) static int lfs_swap_layouts(int argc, char **argv) { - if (argc != 3) + int noxtime = 0; + + if (argc == 4 && !strcmp(argv[1], "-n")) + noxtime = 1; + else if (argc != 3) return CMD_HELP; - return llapi_swap_layouts(argv[1], argv[2], 0, 0, - SWAP_LAYOUTS_KEEP_MTIME | - SWAP_LAYOUTS_KEEP_ATIME); + return llapi_swap_layouts(argv[1+noxtime], argv[2+noxtime], + 0, 0, noxtime ? 0 : + (SWAP_LAYOUTS_KEEP_MTIME | + SWAP_LAYOUTS_KEEP_ATIME)); } static const char *const ladvise_names[] = LU_LADVISE_NAMES; -- 1.8.3.1