Whamcloud - gitweb
LU-12848 tests: link succeded to an ophan remote object 91/35991/14
authorAlexander Zarochentsev <c17826@cray.com>
Mon, 12 Aug 2019 20:59:05 +0000 (23:59 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Aug 2021 06:22:48 +0000 (06:22 +0000)
An open file gets unlinked by rename,
at the same time a cross-mdt link is able to create a name
for a dying object. That causes a file system corruption
seeing as a failed attempt to remove the test dir, also
e2fsck would see an unconnected inode.

Cray-bug-id: LUS-6208
Test-Parameters: mdtcount=2 envdefinitions=ONLY=111 testlist=sanityn
Change-Id: Ic1fde278e5f4b53eaf5560ab50fe460d8c7f7dc3
Signed-off-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-on: https://review.whamcloud.com/35991
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdt/mdt_reint.c
lustre/tests/sanityn.sh

index 78fe61b..e813f28 100644 (file)
@@ -263,6 +263,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_DQACQ_NET           0x187
 #define OBD_FAIL_MDS_STRIPE_CREATE      0x188
 #define OBD_FAIL_MDS_STRIPE_FID                 0x189
 #define OBD_FAIL_MDS_DQACQ_NET           0x187
 #define OBD_FAIL_MDS_STRIPE_CREATE      0x188
 #define OBD_FAIL_MDS_STRIPE_FID                 0x189
+#define OBD_FAIL_MDS_LINK_RENAME_RACE   0x18a
 
 /* OI scrub */
 #define OBD_FAIL_OSD_SCRUB_DELAY                       0x190
 
 /* OI scrub */
 #define OBD_FAIL_OSD_SCRUB_DELAY                       0x190
index 82a3bf1..44b9638 100644 (file)
@@ -1349,6 +1349,8 @@ static int mdt_reint_link(struct mdt_thread_info *info,
 
        cos_incompat = (mdt_object_remote(mp) || mdt_object_remote(ms));
 
 
        cos_incompat = (mdt_object_remote(mp) || mdt_object_remote(ms));
 
+       OBD_RACE(OBD_FAIL_MDS_LINK_RENAME_RACE);
+
        lhp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lhp, LCK_PW, &rr->rr_name);
        rc = mdt_reint_object_lock(info, mp, lhp, MDS_INODELOCK_UPDATE,
        lhp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lhp, LCK_PW, &rr->rr_name);
        rc = mdt_reint_object_lock(info, mp, lhp, MDS_INODELOCK_UPDATE,
@@ -2947,6 +2949,7 @@ out_put_srcdir:
                mdt_dom_discard_data(info, mnew);
                mdt_object_put(info->mti_env, mnew);
        }
                mdt_dom_discard_data(info, mnew);
                mdt_object_put(info->mti_env, mnew);
        }
+       OBD_RACE(OBD_FAIL_MDS_LINK_RENAME_RACE);
        return rc;
 }
 
        return rc;
 }
 
index 71af185..acecb8c 100755 (executable)
@@ -5643,6 +5643,40 @@ test_110() {
 }
 run_test 110 "do not grant another lock on resend"
 
 }
 run_test 110 "do not grant another lock on resend"
 
+test_111() {
+       [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs"
+       [[ $(facet_active_host mds1) = $(facet_active_host mds2) ]] ||
+               skip "MDT0 and MDT1 should be on the same node"
+
+       mkdir $DIR1/$tdir
+       $LFS mkdir -i 0 $DIR1/$tdir/mdt0dir
+       $LFS mkdir -i 1 $DIR1/$tdir/mdt1dir
+
+       mkdir $DIR1/$tdir/mdt0dir/foodir
+       touch $DIR1/$tdir/mdt0dir/foodir/{file1,file2}
+
+       $MULTIOP $DIR2/$tdir/mdt0dir/foodir/file2 Ow4096_c &
+       MULTIOP_PID=$!
+       ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2
+
+       #define OBD_FAIL_MDS_LINK_RENAME_RACE   0x18a
+       do_facet mds1 $LCTL set_param fail_loc=0x8000018a
+
+       ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2x &
+       sleep 1
+
+       rm $DIR2/$tdir/mdt1dir/file2
+       sleep 1
+
+       mv $DIR2/$tdir/mdt0dir/foodir/file1 $DIR2/$tdir/mdt0dir/foodir/file2
+       sleep 1
+
+       kill $MULTIOP_PID
+       wait
+       rm -r $DIR1/$tdir || error "Removing test dir failed"
+}
+run_test 111 "A racy rename/link an open file should not cause fs corruption"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script