Whamcloud - gitweb
LU-10695 tests: fix sanity-lfsck test_23c 07/33407/7
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 19 Oct 2018 23:13:05 +0000 (17:13 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 6 Nov 2018 06:41:41 +0000 (06:41 +0000)
sanity-lfsck test_23c fails intermittently with "(8) unexpected size"
while introducing the required corruption to the filesystem to run
the test.

It appears that in these cases, LFSCK has actually fixed the file
before it can be "further corrupted" as part of the test.  In
this case, the test failure is actually a sign that LFSCK is working
correctly, so it should not be considered a test failure.

Add a check that the file was repaired correctly (contains original
data) and consider the test a pass.

Test-Parameters: trivial testlist=sanity-lfsck,sanity-lfsck,sanity-lfsck mdtfilesystemtype=zfs ostfilesystemtype=zfs
Test-Parameters: testlist=sanity-lfsck,sanity-lfsck,sanity-lfsck mdtfilesystemtype=zfs ostfilesystemtype=zfs
Test-Parameters: testlist=sanity-lfsck,sanity-lfsck,sanity-lfsck mdtfilesystemtype=zfs ostfilesystemtype=zfs
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I9d92cd8e471426cc544293e1149ad556a33ebbe5
Reviewed-on: https://review.whamcloud.com/33407
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/sanity-lfsck.sh

index 468478c..7619bb0 100644 (file)
@@ -3889,6 +3889,18 @@ test_23b() {
 }
 run_test 23b "LFSCK can repair dangling name entry (2)"
 
+cleanup_23c() {
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(10) unexpected status"
+       }
+
+       stop_full_debug_logging
+}
+
 test_23c() {
        echo "#####"
        echo "The objectA has multiple hard links, one of them corresponding"
@@ -3908,34 +3920,34 @@ test_23c() {
        check_mount_and_prep
 
        $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
-       $LFS path2fid $DIR/$tdir/d0
+       parent_fid="$($LFS path2fid $DIR/$tdir/d0)"
+       echo "parent_fid=$parent_fid"
 
        createmany -o $DIR/$tdir/d0/t 10 || error "(1.5) Fail to creatmany"
 
        echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
-       $LFS path2fid $DIR/$tdir/d0/f0
+       f0_fid="$($LFS path2fid $DIR/$tdir/d0/f0)"
+       echo "f0_fid=$f0_fid"
 
        echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
-       $LFS path2fid $DIR/$tdir/d0/f1
-
-       local SEQ0=$($LFS path2fid $DIR/$tdir/d0/f0 | awk -F':' '{print $1}')
-       local SEQ1=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $1}')
+       f1_fid="$($LFS path2fid $DIR/$tdir/d0/f1)"
+       echo "f1_fid=$f1_fid"
 
-       if [ "$SEQ0" != "$SEQ1" ]; then
+       if [ "${fid_f0/:.*/}" != "${fid_f1/:.*/}" ]; then
                # To guarantee that the f0 and f1 are in the same FID seq
                rm -f $DIR/$tdir/d0/f0 ||
                        error "(3.1) Fail to unlink $DIR/$tdir/d0/f0"
                echo "dummy" > $DIR/$tdir/d0/f0 ||
                        error "(3.2) Fail to touch on MDT0"
-               $LFS path2fid $DIR/$tdir/d0/f0
+               f0_fid="$($LFS path2fid $DIR/$tdir/d0/f0)"
+               echo "f0_fid=$f0_fid (replaced)"
        fi
 
-       local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}')
-       OID=$(printf %d $OID)
+       local oid=$(awk -F':' '{ printf $2 }' <<< $f1_fid)
 
        echo "Inject failure stub on MDT0 to simulate dangling name entry"
        #define OBD_FAIL_LFSCK_DANGLING3        0x1621
-       do_facet $SINGLEMDS $LCTL set_param fail_val=$OID fail_loc=0x1621
+       do_facet $SINGLEMDS $LCTL set_param fail_val=$oid fail_loc=0x1621
        ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
        do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
 
@@ -3960,8 +3972,17 @@ test_23c() {
        $START_NAMESPACE -r -C ||
                error "(7) Fail to start LFSCK for namespace"
 
-       wait_update_facet client "stat $DIR/$tdir/d0/foo |
-               awk '/Size/ { print \\\$2 }'" "0" $LTIME || {
+       wait_update_facet client "stat -c%s $DIR/$tdir/d0/foo" "0" $LTIME || {
+               # While unexpected by the test, it is valid for LFSCK to repair
+               # the link to the original object before any data is written.
+               local size=$(stat -c %s $DIR/$tdir/d0/foo)
+
+               if [ "$size" = "6" -a "$(<$DIR/$tdir/d0/foo)" = "dummy" ]; then
+                       log "LFSCK repaired file prematurely"
+                       cleanup_23c
+                       return 0
+               fi
+
                stat $DIR/$tdir/d0/foo
                $SHOW_NAMESPACE
                error "(8) unexpected size"
@@ -3970,15 +3991,7 @@ test_23c() {
        echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
        cancel_lru_locks osc
 
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
-       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
-               mdd.${MDT_DEV}.lfsck_namespace |
-               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
-               $SHOW_NAMESPACE
-               error "(10) unexpected status"
-       }
-
-       stop_full_debug_logging
+       cleanup_23c
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^dangling_repaired/ { print $2 }')