From: Bobi Jam Date: Fri, 3 Mar 2023 03:50:42 +0000 (+0800) Subject: LU-16263 lov: continue fsync on other OST objs even on -ENOENT X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=9f06db4bf4fc11ed8e77bf670f13a574ec5783a2;p=fs%2Flustre-release.git LU-16263 lov: continue fsync on other OST objs even on -ENOENT When fsync races with truncate, we'd continue to other OST object's fsync even some stripe fsync returns -ENOENT, so that on client it could potentially discard caching pages by calling osc_io_fsync_start()->osc_cache_writebase_range(). Lustre-change: https://review.whamcloud.com/50005 Lustre-commit: 927b5cd49c3369d533d7f8dc5c8df497aaf33b6e Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120 Signed-off-by: Bobi Jam Change-Id: I457ba80063086e310df55aaa22778b51a6ea211e Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50195 Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 47a766b..ae0c678 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -289,6 +289,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSD_SCRUB_STALE 0x19f #define OBD_FAIL_OFD_SET_OID 0x1e0 +#define OBD_FAIL_OFD_COMMITRW_DELAY 0x1e1 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 9adcb40..d40b06c 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -1047,8 +1047,16 @@ static int lov_io_call(const struct lu_env *env, struct lov_io *lio, ENTRY; list_for_each_entry(sub, &lio->lis_active, sub_linkage) { rc = iofunc(sub->sub_env, &sub->sub_io); - if (rc) + if (rc) { + /** + * fsync race with truncate, we'd continue to other + * OST object's fsync to potentially discard + * caching pages (osc_cache_writeback_range). + */ + if (rc == -ENOENT && parent->ci_type == CIT_FSYNC) + continue; break; + } if (parent->ci_result == 0) parent->ci_result = sub->sub_io.ci_result; diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index caf9870..ef27d46 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -1274,6 +1274,8 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp, } retry: + CFS_FAIL_TIMEOUT(OBD_FAIL_OFD_COMMITRW_DELAY, cfs_fail_val); + th = ofd_trans_create(env, ofd); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 05a9a30..e878aeb 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -23111,6 +23111,17 @@ test_273b() { } run_test 273b "DoM: race writeback and object destroy" +test_273c() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -E-1 -c-1 $DIR/$tdir + + #define OBD_FAIL_OFD_COMMITRW_DELAY 0x1e1 + do_facet ost1 $LCTL set_param fail_loc=0x800001e1 fail_val=2 + + $MULTIOP $DIR/$tdir/$tfile Ouw2097152c +} +run_test 273c "race writeback and object destroy" + test_275() { remote_ost_nodsh && skip "remote OST with nodsh" [ $OST1_VERSION -lt $(version_code 2.10.57) ] &&