From f5564c35ede12659acedd14845cb36e70563233a Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Wed, 23 Aug 2023 17:19:38 +0300 Subject: [PATCH] LU-16935 llite: avoid hopeless i/o repeats On SLES12SP5 kernels (4.12.14_122.147, 4.12.14-122.162) a race between ll_filemap_fault and ll_imp_inval may lead to the livelock: - ll_filemap_fault loops endlessly as filemap_fault()->readpage() returns VM_FAULT_SIGBUS (it is unable to send read rpc as import is invalid) and as ll_page_inv_lock gets incremented within cl_page_discard()->..->vvp_page_delete() called after readpage failure. - ll_imp_inval stucks in obd_import_event(IMP_EVENT_INVALIDATE)->..->osc_object_invalidate (before recovery) waiting for completion of i/o ll_filemap_fault can not complete. @ll_page_inv_lock is used to check the page being read by kernel after it has been deleted from Lustre, which avoids potential stale data reads. This seqlock allows us to see that a page was potentially deleted, catch it in this case and repeat the I/O in ll_filemap_fault() or vvp_io_read_start(). To avoid endless I/O repeat wrongly, in this patch we only increse @ll_page_inv_lock for the page in PageUptodate state when delete the page in vvp_page_delete(). The page that not in PageUptodate state is usually deleted due to the error that does not require retry. By this way, ll_filemap_fault() and vvp_io_read_start() will not loop endless for those errors that does not need to repeat I/O as the seqlock @ll_page_inv_lock does not have any change. Test to illustrate the issus is added. sanity.sh tests are to test i/o error handling. cl_io_loop(): avoid restart if ci_tried_all_mirrors flag is set. HPE-bug-id: LUS-11686 Signed-off-by: Vladimir Saveliev Signed-off-by: Qian Yingjin Change-Id: I3b62bc95db01bf11f6098011bf29e4064c7e201e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51505 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Andrew Perepechko Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/llite/vvp_io.c | 2 ++ lustre/llite/vvp_page.c | 12 +++++++----- lustre/obdclass/cl_io.c | 8 ++++++-- lustre/tests/recovery-small.sh | 17 +++++++++++++++++ lustre/tests/sanity.sh | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 7 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 028f14b..4d88f21 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -641,6 +641,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 #define OBD_FAIL_LLITE_DELAY_TRUNCATE 0x1430 #define OBD_FAIL_LLITE_READ_PAUSE 0x1431 +#define OBD_FAIL_LLITE_FAULT_PAUSE 0x1432 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 62e9903..0fa77d0 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1510,6 +1510,8 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result != 0) RETURN(result); + CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_FAULT_PAUSE, cfs_fail_val); + /* must return locked page */ if (fio->ft_mkwrite) { LASSERT(cfio->ft_vmpage != NULL); diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index 07d3906..a900bbb 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -85,11 +85,13 @@ static void vvp_page_delete(const struct lu_env *env, * kernel after it has been deleted from Lustre, which avoids * potential stale data reads. The seqlock allows us to see * that a page was potentially deleted and catch the resulting - * SIGBUS - see ll_filemap_fault() (LU-16160) */ - write_seqlock(&ll_i2info(inode)->lli_page_inv_lock); - ClearPageUptodate(vmpage); - write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock); - + * SIGBUS - see ll_filemap_fault() (LU-16160) + */ + if (PageUptodate(vmpage)) { + write_seqlock(&ll_i2info(inode)->lli_page_inv_lock); + ClearPageUptodate(vmpage); + write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock); + } /* * The reference from vmpage to cl_page is removed, * but the reference back is still here. It is removed diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index ef674ae..b686e6e 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -783,8 +783,12 @@ int cl_io_loop(const struct lu_env *env, struct cl_io *io) result = rc; if (result == -EAGAIN && io->ci_ndelay && !io->ci_iocb_nowait) { - io->ci_need_restart = 1; - result = 0; + if (!io->ci_tried_all_mirrors) { + io->ci_need_restart = 1; + result = 0; + } else { + result = -EIO; + } } if (result == 0) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 736ddd5..72ebcc1 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3603,6 +3603,23 @@ test_156() } run_test 156 "tot_granted miscount after client eviction" +test_157() +{ + $LFS setstripe -i 0 -c 1 $DIR/$tfile || error "setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 + cancel_lru_locks osc + +#define OBD_FAIL_LLITE_FAULT_PAUSE 0x1432 + $LCTL set_param fail_loc=0x80001432 fail_val=3 + $MULTIOP $DIR/$tfile soO_RDWR:MRUc & + MULTIPID=$! + sleep 1 + ost_evict_client + wait $MULTIPID + [[ $? == 135 ]] || error "multiop failed with not SIGBUS" +} +run_test 157 "eviction during mmaped i/o" + complete_test $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 9a886fe..291e38f 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -28003,6 +28003,38 @@ test_398q() } run_test 398q "race dio with buffered i/o" +test_398r() { + $LFS setstripe -i 0 -c 1 $DIR/$tfile || error "setstripe failed" + echo "hello, world" > $DIR/$tfile + + cancel_lru_locks osc + +#define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost1 $LCTL set_param fail_loc=0x20f + cat $DIR/$tfile > /dev/null && error "cat should fail" + return 0 +} +run_test 398r "i/o error on file read" + +test_398s() { + [[ $OSTCOUNT -ge 2 && "$ost1_HOST" = "$ost2_HOST" ]] || + skip "remote OST" + + $LFS mirror create -N -i 0 -c 1 -N -i 1 -c 1 $DIR/$tfile || + error "mirror create failed" + + echo "hello, world" > $DIR/$tfile + $LFS mirror resync $DIR/$tfile || error "mirror resync failed" + + cancel_lru_locks osc + +#define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost1 $LCTL set_param fail_loc=0x20f + cat $DIR/$tfile > /dev/null && error "cat should fail" + return 0 +} +run_test 398s "i/o error on mirror file read" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then -- 1.8.3.1