From: wang di Date: Mon, 16 Sep 2013 14:02:41 +0000 (-0700) Subject: LU-3626 tests: More time to allow mds-ost sync being finished X-Git-Tag: 2.5.51~75 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=28806162c234a178365853a393aa401e180256e2;ds=sidebyside LU-3626 tests: More time to allow mds-ost sync being finished Because mds-ost unlink log synchronization is not enforced by any timeout, so we will add more time to allow mds-ost sync being finished in replay-single test_20b. And also add more information in the tests to help us understand which MDT slow down the synchronziation. Signed-off-by: wang di Change-Id: I5f63540e602ad29cd221de6dac0c22546629a9cd Reviewed-on: http://review.whamcloud.com/7676 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu --- diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 5c695d8..3086976 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -424,27 +424,30 @@ test_20a() { # was test_20 run_test 20a "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)" test_20b() { # bug 10480 - BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` + local wait_timeout=$((TIMEOUT * 4)) + local BEFOREUSED + local AFTERUSED - dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 & - pid=$! - while [ ! -e $DIR/$tfile ] ; do - usleep 60 # give dd a chance to start - done - - $GETSTRIPE $DIR/$tfile || return 1 - rm -f $DIR/$tfile || return 2 # make it an orphan - mds_evict_client - client_up || client_up || true # reconnect + BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 & + pid=$! + while [ ! -e $DIR/$tfile ] ; do + usleep 60 # give dd a chance to start + done - fail $SINGLEMDS # start orphan recovery - wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" - wait_mds_ost_sync || return 3 - AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` - log "before $BEFOREUSED, after $AFTERUSED" - (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) && - error "after $AFTERUSED > before $BEFOREUSED" - return 0 + $GETSTRIPE $DIR/$tfile || return 1 + rm -f $DIR/$tfile || return 2 # make it an orphan + mds_evict_client + client_up || client_up || true # reconnect + + fail $SINGLEMDS # start orphan recovery + wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" + wait_delete_completed_mds $wait_timeout || return 3 + AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }') + log "before $BEFOREUSED, after $AFTERUSED" + (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) && + error "after $AFTERUSED > before $BEFOREUSED" + return 0 } run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 190026d..d516313 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2068,6 +2068,7 @@ wait_mds_ost_sync () { echo "Waiting for orphan cleanup..." # MAX value includes time needed for MDS-OST reconnection local MAX=$(( TIMEOUT * 2 )) + local WAIT_TIMEOUT=${1:-$MAX} local WAIT=0 local new_wait=true local list=$(comma_list $(mdts_nodes)) @@ -2080,7 +2081,9 @@ wait_mds_ost_sync () { list=$(comma_list $(osts_nodes)) cmd="$LCTL get_param -n obdfilter.*.mds_sync" fi - while [ $WAIT -lt $MAX ]; do + + echo "wait $WAIT_TIMEOUT secs maximumly for $list mds-ost sync done." + while [ $WAIT -lt $WAIT_TIMEOUT ]; do local -a sync=($(do_nodes $list "$cmd")) local con=1 local i @@ -2096,10 +2099,13 @@ wait_mds_ost_sync () { done sleep 2 # increase waiting time and cover statfs cache [ ${con} -eq 1 ] && return 0 - echo "Waiting $WAIT secs for $facet mds-ost sync done." + echo "Waiting $WAIT secs for $list $i mds-ost sync done." WAIT=$((WAIT + 2)) done - echo "$facet recovery not done in $MAX sec. $STATUS" + + # show which nodes are not finished. + do_nodes $list "$cmd" + echo "$facet recovery node $i not done in $WAIT_TIMEOUT sec. $STATUS" return 1 }