Whamcloud - gitweb
LU-8672 tests: Fix error handling in replay-single test_89 74/22974/7
authorAbrarahmed Momin <abrar.habib@seagate.com>
Thu, 22 Feb 2018 16:50:16 +0000 (19:50 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 3 Mar 2018 04:29:03 +0000 (04:29 +0000)
Update replay-single test_89() to error out on wait_mds_ost_sync and
wait_delete_completed timeout.

Correct error handling in wait_delete_completed_mds and
wait_delete_completed.

Signed-off-by: Abrarahmed Momin <abrar.habib@seagate.com>
Signed-off-by: Elena Gryaznova <c17455@cray.com>
Signed-off-by: Ashish Purkar <ashish.purkar@seagate.com>
Signed-off-by: James Nunez <james.a.nunez@intel.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@seagate.com>
Cray-bug-id: MRP-1680
Test-Parameters: trivial
Change-Id: I54e30221361e73a17ba857cb19b1efcc019b412f
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-by: Rahul Deshmukh <rahul.deshmukh@seagate.com>
Reviewed-by: Ujjwal Lanjewar <ujjwal.lanjewar@seagate.com>
Reviewed-by: Elena Gryaznova <c17455@cray.com>
Reviewed-on: https://review.whamcloud.com/22974
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/replay-single.sh
lustre/tests/test-framework.sh

index f159f93..3b0f75e 100755 (executable)
@@ -29,8 +29,8 @@ ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT "
 [ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
 
 [ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
-# bug number for skipped test:         LU-5761
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT   89"
+# bug number for skipped test:
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT "
 
 build_test_filter
 
@@ -3297,8 +3297,8 @@ test_89() {
        cancel_lru_locks osc
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
        rm -f $DIR/$tdir/$tfile
-       wait_mds_ost_sync
-       wait_delete_completed
+       wait_mds_ost_sync || error "initial MDS-OST sync timed out"
+       wait_delete_completed || error "initial wait delete timed out"
        BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
        $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
        dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
@@ -3310,8 +3310,8 @@ test_89() {
        mount_facet ost1
        zconf_mount $(hostname) $MOUNT || error "mount fails"
        client_up || error "client_up failed"
-       wait_mds_ost_sync
-       wait_delete_completed
+       wait_mds_ost_sync || error "MDS-OST sync timed out"
+       wait_delete_completed || error "wait delete timed out"
        BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
        [ $((BLOCKS2 - BLOCKS1)) -le 4  ] ||
                error $((BLOCKS2 - BLOCKS1)) blocks leaked
index 1f53fc9..0a78649 100755 (executable)
@@ -2526,18 +2526,18 @@ sync_all_data() {
 }
 
 wait_zfs_commit() {
+       local zfs_wait=${2:-5}
+
        # the occupied disk space will be released
-       # only after DMUs are committed
+       # only after TXGs are committed
        if [[ $(facet_fstype $1) == zfs ]]; then
-               echo "sleep $2 for ZFS OSD"
-               sleep $2
+               echo "sleep $zfs_wait for ZFS $(facet_fstype $1)"
+               sleep $zfs_wait
        fi
 }
 
 wait_delete_completed_mds() {
-       local MAX_WAIT=${1:-20}
-       # for ZFS, waiting more time for DMUs to be committed
-       local ZFS_WAIT=${2:-5}
+       local max_wait=${1:-20}
        local mds2sync=""
        local stime=$(date +%s)
        local etime
@@ -2554,8 +2554,8 @@ wait_delete_completed_mds() {
                mds2sync="$mds2sync $node"
        done
        if [ -z "$mds2sync" ]; then
-               wait_zfs_commit $SINGLEMDS $ZFS_WAIT
-               return
+               wait_zfs_commit $SINGLEMDS
+               return 0
        fi
        mds2sync=$(comma_list $mds2sync)
 
@@ -2567,21 +2567,26 @@ wait_delete_completed_mds() {
        # do this upon commit
 
        local WAIT=0
-       while [[ $WAIT -ne $MAX_WAIT ]]; do
+       while [[ $WAIT -ne $max_wait ]]; do
                changes=$(do_nodes $mds2sync \
                        "$LCTL get_param -n osc.*MDT*.sync_*" | calc_sum)
                #echo "$node: $changes changes on all"
                if [[ $changes -eq 0 ]]; then
-                       wait_zfs_commit $SINGLEMDS $ZFS_WAIT
-                       return
+                       wait_zfs_commit $SINGLEMDS
+
+                       # the occupied disk space will be released
+                       # only after TXGs are committed
+                       wait_zfs_commit ost1
+                       return 0
                fi
                sleep 1
-               WAIT=$(( WAIT + 1))
+               WAIT=$((WAIT + 1))
        done
 
        etime=$(date +%s)
        echo "Delete is not completed in $((etime - stime)) seconds"
        do_nodes $mds2sync "$LCTL get_param osc.*MDT*.sync_*"
+       return 1
 }
 
 wait_for_host() {
@@ -2689,6 +2694,7 @@ wait_mds_ost_sync () {
        done
 
        # show which nodes are not finished.
+       cmd=$(echo $cmd | sed '/-n//')
        do_nodes $list "$cmd"
        echo "$facet recovery node $i not done in $WAIT_TIMEOUT sec. $STATUS"
        return 1
@@ -2736,7 +2742,7 @@ wait_destroy_complete () {
 
 wait_delete_completed() {
        wait_delete_completed_mds $1 || return $?
-       wait_destroy_complete
+       wait_destroy_complete || return $?
 }
 
 wait_exit_ST () {