Whamcloud - gitweb
LU-12752 mdt: commitrw_write() - check dying object under lock 97/41797/5
authorVladimir Saveliev <c17830@cray.com>
Mon, 1 Mar 2021 08:52:51 +0000 (11:52 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 16 Mar 2021 18:16:20 +0000 (18:16 +0000)
If process writes to unlinked file the following race between
mdt_commitrw_write() and mdd_close() may occur because
mdt_commitrw_write() checks whether an object is dying without lock:

mdt_commitrw_write() checks lu_object_is_dying(&mo->mot_header) and it
not yet

mdd_close() interposes and destroys the object via
  mdo_destroy()
    lod_destroy()
      lod_sub_destroy()
        osd_destroy()
          obj->oo_destroyed = 1;

mdt_commitrw_write() continues, locks the object and returns ENOENT
from

  dt_attr_get()
    osd_attr_get()
      if (unlikely(obj->oo_destroyed))
        return -ENOENT;

If the file is built of DoM and raid component ll_delete_inode() calls
cl_sync_file_range() which is to iterate over both mdt and raid
components via mdc_io_fsync_start() and osc_io_fsync_start().  As
mdc_io_fsync_start() fails with -ENOENT due to failed write rpc,
osc_io_fsync_start() does not get called. Then
truncate_inode_pages_final() finds not-discarded pages and fails with:

  (osc_page.c:183:osc_page_delete()) Trying to teardown failed: -16
  (osc_page.c:184:osc_page_delete()) ASSERTION( 0 ) failed:
  (osc_page.c:184:osc_page_delete()) LBUG

Test to illustrate the issue is added.

The fix is to call lu_object_is_dying() under object lock.

Change-Id: I463c8a6f85d4f5fd934b167c6194f50ae9d4b7d4
HPE-bug-id: LUS-7189
Signed-off-by: Vladimir Saveliev <c17830@cray.com>
Reviewed-on: https://review.whamcloud.com/41797
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdt/mdt_io.c
lustre/tests/sanity.sh

index 64c4979..1b6bba4 100644 (file)
@@ -244,6 +244,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_STATFS_SPOOF       0x168
 #define OBD_FAIL_MDS_REINT_OPEN                 0x169
 #define OBD_FAIL_MDS_REINT_OPEN2        0x16a
 #define OBD_FAIL_MDS_STATFS_SPOOF       0x168
 #define OBD_FAIL_MDS_REINT_OPEN                 0x169
 #define OBD_FAIL_MDS_REINT_OPEN2        0x16a
+#define OBD_FAIL_MDS_COMMITRW_DELAY     0x16b
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index 5924aa1..7e564d1 100644 (file)
@@ -611,12 +611,6 @@ static int mdt_commitrw_write(const struct lu_env *env, struct obd_export *exp,
 retry:
        if (!dt_object_exists(dob))
                GOTO(out, rc = -ENOENT);
 retry:
        if (!dt_object_exists(dob))
                GOTO(out, rc = -ENOENT);
-       if (lu_object_is_dying(&mo->mot_header)) {
-               /* Commit to stale object can be just skipped silently. */
-               CDEBUG(D_INODE, "skip commit to stale object "DFID"\n",
-                       PFID(mdt_object_fid(mo)));
-               GOTO(out, rc = 0);
-       }
 
        if (niocount == 0) {
                rc = -EPROTO;
 
        if (niocount == 0) {
                rc = -EPROTO;
@@ -626,6 +620,8 @@ retry:
                GOTO(out, rc);
        }
 
                GOTO(out, rc);
        }
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_COMMITRW_DELAY, cfs_fail_val);
+
        th = dt_trans_create(env, dt);
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
        th = dt_trans_create(env, dt);
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
@@ -657,6 +653,12 @@ retry:
                GOTO(out_stop, rc);
 
        dt_write_lock(env, dob, 0);
                GOTO(out_stop, rc);
 
        dt_write_lock(env, dob, 0);
+       if (lu_object_is_dying(&mo->mot_header)) {
+               /* Commit to stale object can be just skipped silently. */
+               CDEBUG(D_INODE, "skip commit to stale object "DFID"\n",
+                       PFID(mdt_object_fid(mo)));
+               GOTO(unlock, rc = 0);
+       }
        rc = dt_write_commit(env, dob, lnb, niocount, th, oa->o_size);
        if (rc) {
                restart = th->th_restart_tran;
        rc = dt_write_commit(env, dob, lnb, niocount, th, oa->o_size);
        if (rc) {
                restart = th->th_restart_tran;
index ea2a9ea..865cac4 100755 (executable)
@@ -21350,6 +21350,17 @@ test_273a() {
 }
 run_test 273a "DoM: layout swapping should fail with DOM"
 
 }
 run_test 273a "DoM: layout swapping should fail with DOM"
 
+test_273b() {
+       mkdir -p $DIR/$tdir
+       $LFS setstripe -E 1M -L mdt -E -1 -c -1 $DIR/$tdir
+
+#define OBD_FAIL_MDS_COMMITRW_DELAY      0x16b
+       do_facet mds1 $LCTL set_param fail_loc=0x8000016b fail_val=2
+
+       $MULTIOP $DIR/$tdir/$tfile Ouw2097152c
+}
+run_test 273b "DoM: race writeback and object destroy"
+
 test_275() {
        remote_ost_nodsh && skip "remote OST with nodsh"
        [ $OST1_VERSION -lt $(version_code 2.10.57) ] &&
 test_275() {
        remote_ost_nodsh && skip "remote OST with nodsh"
        [ $OST1_VERSION -lt $(version_code 2.10.57) ] &&