Whamcloud - gitweb
LU-4086 tests: strengthen sanity-hsm/test_33 00/9600/8
authorBruno Faccini <bruno.faccini@intel.com>
Wed, 12 Mar 2014 13:12:33 +0000 (14:12 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 28 Mar 2014 04:52:35 +0000 (04:52 +0000)
Instead of waiting+checking both Restore and Cancel HSM operations
sequentially, wait for both to be finished and then check each
results. This will allow to handle conditions where Restore
terminates before the Cancel is started.

Also added to wait_all_done() the capability to only work on
requests for a specific fid.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I480ce074513d6829ecb3078c273bac1fdffa6256
Reviewed-on: http://review.whamcloud.com/9600
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Aurelien Degremont <aurelien.degremont@cea.fr>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/sanity-hsm.sh

index 80401fa..54c5e20 100755 (executable)
@@ -658,8 +658,10 @@ get_request_count() {
 
 wait_all_done() {
        local timeout=$1
 
 wait_all_done() {
        local timeout=$1
+       local fid=$2
 
        local cmd="$LCTL get_param -n $HSM_PARAM.actions"
 
        local cmd="$LCTL get_param -n $HSM_PARAM.actions"
+       [[ -n $fid ]] && cmd+=" | grep '$fid'"
        cmd+=" | egrep 'WAITING|STARTED'"
 
        wait_result $SINGLEMDS "$cmd" "" $timeout ||
        cmd+=" | egrep 'WAITING|STARTED'"
 
        wait_result $SINGLEMDS "$cmd" "" $timeout ||
@@ -2225,6 +2227,15 @@ test_33() {
        wait_request_state $fid ARCHIVE SUCCEED
        $LFS hsm_release $f
 
        wait_request_state $fid ARCHIVE SUCCEED
        $LFS hsm_release $f
 
+       # to be sure wait_all_done will not be mislead by previous tests
+       # and ops.
+       cdt_purge
+       wait_for_grace_delay
+       # Also raise grace_delay significantly so the Canceled
+       # Restore action will stay enough long avail.
+       local old_grace=$(get_hsm_param grace_delay)
+       set_hsm_param grace_delay 100
+
        md5sum $f >/dev/null &
        local pid=$!
        wait_request_state $fid RESTORE STARTED
        md5sum $f >/dev/null &
        local pid=$!
        wait_request_state $fid RESTORE STARTED
@@ -2237,8 +2248,29 @@ test_33() {
 
        $LFS hsm_cancel $f
 
 
        $LFS hsm_cancel $f
 
-       wait_request_state $fid RESTORE CANCELED
-       wait_request_state $fid CANCEL SUCCEED
+       # instead of waiting+checking both Restore and Cancel ops
+       # sequentially, wait for both to be finished and then check
+       # each results.
+       wait_all_done 100 $fid
+       local rstate=$(get_request_state $fid RESTORE)
+       local cstate=$(get_request_state $fid CANCEL)
+
+       # restore orig grace_delay.
+       set_hsm_param grace_delay $old_grace
+
+       if [[ "$rstate" == "CANCELED" ]] ; then
+               [[ "$cstate" == "SUCCEED" ]] ||
+                       error "Restore state is CANCELED and Cancel state " \
+                              "is not SUCCEED but $cstate"
+               echo "Restore state is CANCELED, Cancel state is SUCCEED"
+       elif [[ "$rstate" == "SUCCEED" ]] ; then
+               [[ "$cstate" == "FAILED" ]] ||
+                       error "Restore state is SUCCEED and Cancel state " \
+                               "is not FAILED but $cstate"
+               echo "Restore state is SUCCEED, Cancel state is FAILED"
+       else
+               error "Restore state is $rstate and Cancel state is $cstate"
+       fi
 
        [ -z $killed ] ||
                error "Cannot kill process waiting for restore ($killed)"
 
        [ -z $killed ] ||
                error "Cannot kill process waiting for restore ($killed)"