Whamcloud - gitweb
LU-16263 lov: continue fsync on other OST objs even on -ENOENT
authorBobi Jam <bobijam@whamcloud.com>
Fri, 3 Mar 2023 03:50:42 +0000 (11:50 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 25 Apr 2023 03:36:21 +0000 (03:36 +0000)
When fsync races with truncate, we'd continue to other OST object's
fsync even some stripe fsync returns -ENOENT, so that on client it
could potentially discard caching pages by calling
osc_io_fsync_start()->osc_cache_writebase_range().

Lustre-change: https://review.whamcloud.com/50005
Lustre-commit: 927b5cd49c3369d533d7f8dc5c8df497aaf33b6e

Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Test-Parameters: testlist=sanity ostcount=4 env=ONLY="273b 273c",ONLY_REPEAT=120
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I457ba80063086e310df55aaa22778b51a6ea211e
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50195
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/obd_support.h
lustre/lov/lov_io.c
lustre/ofd/ofd_io.c
lustre/tests/sanity.sh

index 47a766b..ae0c678 100644 (file)
@@ -289,6 +289,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSD_SCRUB_STALE                       0x19f
 
 #define OBD_FAIL_OFD_SET_OID                           0x1e0
+#define OBD_FAIL_OFD_COMMITRW_DELAY                    0x1e1
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
index 9adcb40..d40b06c 100644 (file)
@@ -1047,8 +1047,16 @@ static int lov_io_call(const struct lu_env *env, struct lov_io *lio,
        ENTRY;
        list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
                rc = iofunc(sub->sub_env, &sub->sub_io);
-               if (rc)
+               if (rc) {
+                       /**
+                        * fsync race with truncate, we'd continue to other
+                        * OST object's fsync to potentially discard
+                        * caching pages (osc_cache_writeback_range).
+                        */
+                       if (rc == -ENOENT && parent->ci_type == CIT_FSYNC)
+                               continue;
                        break;
+               }
 
                if (parent->ci_result == 0)
                        parent->ci_result = sub->sub_io.ci_result;
index caf9870..ef27d46 100644 (file)
@@ -1274,6 +1274,8 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
        }
 
 retry:
+       CFS_FAIL_TIMEOUT(OBD_FAIL_OFD_COMMITRW_DELAY, cfs_fail_val);
+
        th = ofd_trans_create(env, ofd);
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
index 05a9a30..e878aeb 100755 (executable)
@@ -23111,6 +23111,17 @@ test_273b() {
 }
 run_test 273b "DoM: race writeback and object destroy"
 
+test_273c() {
+       mkdir -p $DIR/$tdir
+       $LFS setstripe -E 1M -E-1 -c-1 $DIR/$tdir
+
+       #define OBD_FAIL_OFD_COMMITRW_DELAY      0x1e1
+       do_facet ost1 $LCTL set_param fail_loc=0x800001e1 fail_val=2
+
+       $MULTIOP $DIR/$tdir/$tfile Ouw2097152c
+}
+run_test 273c "race writeback and object destroy"
+
 test_275() {
        remote_ost_nodsh && skip "remote OST with nodsh"
        [ $OST1_VERSION -lt $(version_code 2.10.57) ] &&