Whamcloud - gitweb
LU-4086 tests: strengthen sanity-hsm/test_33 12/10712/2
authorBruno Faccini <bruno.faccini@intel.com>
Wed, 12 Mar 2014 13:12:33 +0000 (14:12 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 7 Oct 2014 13:19:13 +0000 (13:19 +0000)
Instead of waiting+checking both Restore and Cancel HSM operations
sequentially, wait for both to be finished and then check each
results. This will allow to handle conditions where Restore
terminates before the Cancel is started.

Also added to wait_all_done() the capability to only work on
requests for a specific fid.

Lustre-change: http://review.whamcloud.com/9600
Lustre-commit: adc6a5e76348d9bb1c8355103f419405a14decca

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I480ce074513d6829ecb3078c273bac1fdffa6256
Reviewed-on: http://review.whamcloud.com/9600
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Aurelien Degremont <aurelien.degremont@cea.fr>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
(cherry picked from commit adc6a5e76348d9bb1c8355103f419405a14decca)
Reviewed-on: http://review.whamcloud.com/10712

lustre/tests/sanity-hsm.sh

index 1d2ab49..cda2e5f 100755 (executable)
@@ -664,8 +664,10 @@ get_request_count() {
 
 wait_all_done() {
        local timeout=$1
+       local fid=$2
 
        local cmd="$LCTL get_param -n $HSM_PARAM.actions"
+       [[ -n $fid ]] && cmd+=" | grep '$fid'"
        cmd+=" | egrep 'WAITING|STARTED'"
 
        wait_result $SINGLEMDS "$cmd" "" $timeout ||
@@ -2231,6 +2233,15 @@ test_33() {
        wait_request_state $fid ARCHIVE SUCCEED
        $LFS hsm_release $f
 
+       # to be sure wait_all_done will not be mislead by previous tests
+       # and ops.
+       cdt_purge
+       wait_for_grace_delay
+       # Also raise grace_delay significantly so the Canceled
+       # Restore action will stay enough long avail.
+       local old_grace=$(get_hsm_param grace_delay)
+       set_hsm_param grace_delay 100
+
        md5sum $f >/dev/null &
        local pid=$!
        wait_request_state $fid RESTORE STARTED
@@ -2243,8 +2254,29 @@ test_33() {
 
        $LFS hsm_cancel $f
 
-       wait_request_state $fid RESTORE CANCELED
-       wait_request_state $fid CANCEL SUCCEED
+       # instead of waiting+checking both Restore and Cancel ops
+       # sequentially, wait for both to be finished and then check
+       # each results.
+       wait_all_done 100 $fid
+       local rstate=$(get_request_state $fid RESTORE)
+       local cstate=$(get_request_state $fid CANCEL)
+
+       # restore orig grace_delay.
+       set_hsm_param grace_delay $old_grace
+
+       if [[ "$rstate" == "CANCELED" ]] ; then
+               [[ "$cstate" == "SUCCEED" ]] ||
+                       error "Restore state is CANCELED and Cancel state " \
+                              "is not SUCCEED but $cstate"
+               echo "Restore state is CANCELED, Cancel state is SUCCEED"
+       elif [[ "$rstate" == "SUCCEED" ]] ; then
+               [[ "$cstate" == "FAILED" ]] ||
+                       error "Restore state is SUCCEED and Cancel state " \
+                               "is not FAILED but $cstate"
+               echo "Restore state is SUCCEED, Cancel state is FAILED"
+       else
+               error "Restore state is $rstate and Cancel state is $cstate"
+       fi
 
        [ -z $killed ] ||
                error "Cannot kill process waiting for restore ($killed)"