From 927b5cd49c3369d533d7f8dc5c8df497aaf33b6e Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Wed, 15 Feb 2023 17:09:53 +0800 Subject: [PATCH] LU-16263 lov: continue fsync on other OST objs even on -ENOENT When fsync races with truncate, we'd continue to other OST object's fsync even some stripe fsync returns -ENOENT, so that on client it could potentially discard caching pages by calling osc_io_fsync_start()->osc_cache_writebase_range(). Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Signed-off-by: Bobi Jam Change-Id: I457ba80063086e310df55aaa22778b51a6ea211e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50005 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin Reviewed-by: Alex Zhuravlev --- lustre/include/obd_support.h | 1 + lustre/lov/lov_io.c | 10 +++++++++- lustre/ofd/ofd_io.c | 2 ++ lustre/tests/sanity.sh | 11 +++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 923a1f5..2f5d507 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -293,6 +293,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSD_SCRUB_STALE 0x19f #define OBD_FAIL_OFD_SET_OID 0x1e0 +#define OBD_FAIL_OFD_COMMITRW_DELAY 0x1e1 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index cc95993..99d53be 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -1025,8 +1025,16 @@ static int lov_io_call(const struct lu_env *env, struct lov_io *lio, ENTRY; list_for_each_entry(sub, &lio->lis_active, sub_linkage) { rc = iofunc(sub->sub_env, &sub->sub_io); - if (rc) + if (rc) { + /** + * fsync race with truncate, we'd continue to other + * OST object's fsync to potentially discard + * caching pages (osc_cache_writeback_range). + */ + if (rc == -ENOENT && parent->ci_type == CIT_FSYNC) + continue; break; + } if (parent->ci_result == 0) parent->ci_result = sub->sub_io.ci_result; diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 8c1b798..8956690 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -1296,6 +1296,8 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp, } retry: + CFS_FAIL_TIMEOUT(OBD_FAIL_OFD_COMMITRW_DELAY, cfs_fail_val); + th = ofd_trans_create(env, ofd); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 317a78e..d23cd11 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -23885,6 +23885,17 @@ test_273b() { } run_test 273b "DoM: race writeback and object destroy" +test_273c() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -E-1 -c-1 $DIR/$tdir + + #define OBD_FAIL_OFD_COMMITRW_DELAY 0x1e1 + do_facet ost1 $LCTL set_param fail_loc=0x800001e1 fail_val=2 + + $MULTIOP $DIR/$tdir/$tfile Ouw2097152c +} +run_test 273c "race writeback and object destroy" + test_275() { remote_ost_nodsh && skip "remote OST with nodsh" [ $OST1_VERSION -lt $(version_code 2.10.57) ] && -- 1.8.3.1