From: Michael MacDonald Date: Tue, 25 Feb 2014 00:56:36 +0000 (-0500) Subject: LU-4643 hsm: Make sanity-hsm test_60 more robust X-Git-Tag: 2.5.57~58 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=289d11769bde4d5b427a8d28f6a86b9492aed0b3 LU-4643 hsm: Make sanity-hsm test_60 more robust The first version of this test was fragile and could fail intermittently when test infrastructure was not capable of providing 1MB/sec in lustre bandwidth. This commit changes the test to validate that a progress update occurs within the expected window, rather than testing for a specific amount of data copied under ideal conditions. Signed-off-by: Michael MacDonald Change-Id: Ief40a27452bc09d94101c7df00a46aa171a572f8 Reviewed-on: http://review.whamcloud.com/9376 Reviewed-by: Jian Yu Reviewed-by: James Nunez Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 01dbc9b..efb42ea0 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -565,21 +565,6 @@ wait_request_state() { error "request on $fid is not $state on $mds" } -wait_request_progress() { - local fid=$1 - local request=$2 - local progress=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$((mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.active_requests" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$12}' | cut -f2 -d=" - - wait_result $mds "$cmd" $progress 100 || - error "request on $fid has not made progress $progress on $mds" -} - get_request_state() { local fid=$1 local request=$2 @@ -2554,6 +2539,9 @@ test_58() { run_test 58 "Truncate a released file will trigger restore" test_60() { + # This test validates the fix for LU-4512. Ensure that the -u + # option changes the progress reporting interval from the default + # (30 seconds) to the user-specified interval. local interval=5 local progress_timeout=$((interval * 2)) @@ -2568,10 +2556,36 @@ test_60() { local start_at=$(date +%s) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" - wait_request_progress $fid ARCHIVE 5242880 + + local mdtidx=0 + local mdt=${MDT_PREFIX}${mdtidx} + local mds=mds$((mdtidx + 1)) + + local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests" + cmd+=" | awk '/'$fid'.*action=ARCHIVE/ {print \\\$12}' | cut -f2 -d=" + + local RESULT + local WAIT=0 + local sleep=1 + + echo -n "Expecting a progress update within $progress_timeout seconds... " + while [ true ]; do + RESULT=$(do_node $(facet_active_host $mds) "$cmd") + if [ $RESULT -gt 0 ]; then + echo "$RESULT bytes copied in $WAIT seconds." + break + elif [ $WAIT -ge $progress_timeout ]; then + error "Timed out waiting for progress update!" + break + fi + WAIT=$((WAIT + sleep)) + sleep $sleep + done + local finish_at=$(date +%s) local elapsed=$((finish_at - start_at)) + # Ensure that the progress update occurred within the expected window. if [ $elapsed -gt $progress_timeout ]; then error "Expected progress update within $progress_timeout seconds" elif [ $elapsed -lt $interval ]; then