Whamcloud - gitweb
LU-5622 tests: check/wait for copytool death 22/11922/5
authorBruno Faccini <bruno.faccini@intel.com>
Mon, 15 Sep 2014 15:37:31 +0000 (17:37 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 4 Dec 2014 02:31:37 +0000 (02:31 +0000)
Seems that copytool death/kill may take more time so
this condition must be handled in sanity-hsm copytool_cleanup()
function to avoid situations where copytool will then not be
restarted, but only signaled, in next copytool_setup().

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: Ia72ed07f0219cf0aa2ef5b3805fb1f7faf4dab66
Reviewed-on: http://review.whamcloud.com/11922
Reviewed-by: Henri Doreau <henri.doreau@cea.fr>
Tested-by: Jenkins
Reviewed-by: Robert Read <robert.read@intel.com>
Reviewed-by: James Nunez <james.a.nunez@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/sanity-hsm.sh

index f1e6e52..5ee7105 100755 (executable)
@@ -216,7 +216,7 @@ copytool_setup() {
 
        if [[ -z "$arc_id" ]] &&
                do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then
-                       echo "Wakeup copytool $facet on $agent"
+                       echo "Only wakeup running copytool $facet on $agent"
                        return 0
        fi
 
@@ -273,10 +273,25 @@ copytool_cleanup() {
        local oldstate
        local mdt_hsmctrl
        local hsm_root=$(copytool_device $facet)
+       local end_wait=$(( SECONDS + TIMEOUT ))
 
        do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
-       sleep 1
-       echo "Copytool is stopped on $agents"
+
+       while (( SECONDS < end_wait )); do
+               sleep 2
+               do_nodesv $agents "pgrep -x $HSMTOOL_BASE"
+               if [ $? -ne 0 ]; then
+                       echo "Copytool is stopped on $agents"
+                       break
+               fi
+               echo "Copytool still running on $agents"
+       done
+       if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then
+               error "Copytool failed to stop in ${TIMEOUT}s ..."
+       else
+               echo "Copytool has stopped in " \
+                    "$((TIMEOUT - (end_wait - SECONDS)))s."
+       fi
 
        # clean all CDTs orphans requests from previous tests
        # that would otherwise need to timeout to clear.