Whamcloud - gitweb
LU-5622 tests: check/wait for copytool death 67/12967/2
authorBruno Faccini <bruno.faccini@intel.com>
Fri, 5 Dec 2014 20:23:51 +0000 (12:23 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 27 Jan 2015 02:41:26 +0000 (02:41 +0000)
Seems that copytool death/kill may take more time so
this condition must be handled in sanity-hsm copytool_cleanup()
function to avoid situations where copytool will then not be
restarted, but only signaled, in next copytool_setup().

This patch is back-ported from the following one:
Lustre-commit: 6facf3953b170832200ca9c111398da8feecd281
Lustre-change: http://review.whamcloud.com/11922

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: Ia817936eb030386dbe539ec8d5297812f4b6fff2
Reviewed-on: http://review.whamcloud.com/12967
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Nunez <james.a.nunez@intel.com>
Reviewed-by: Henri Doreau <henri.doreau@cea.fr>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/sanity-hsm.sh

index 26161ac..3c8b73d 100755 (executable)
@@ -213,7 +213,7 @@ copytool_setup() {
 
        if [[ -z "$arc_id" ]] &&
                do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then
-                       echo "Wakeup copytool $facet on $agent"
+                       echo "Only wakeup running copytool $facet on $agent"
                        return 0
        fi
 
@@ -270,10 +270,25 @@ copytool_cleanup() {
        local oldstate
        local mdt_hsmctrl
        local hsm_root=$(copytool_device $facet)
+       local end_wait=$(( SECONDS + TIMEOUT ))
 
        do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
-       sleep 1
-       echo "Copytool is stopped on $agents"
+
+       while (( SECONDS < end_wait )); do
+               sleep 2
+               do_nodesv $agents "pgrep -x $HSMTOOL_BASE"
+               if [ $? -ne 0 ]; then
+                       echo "Copytool is stopped on $agents"
+                       break
+               fi
+               echo "Copytool still running on $agents"
+       done
+       if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then
+               error "Copytool failed to stop in ${TIMEOUT}s ..."
+       else
+               echo "Copytool has stopped in " \
+                    "$((TIMEOUT - (end_wait - SECONDS)))s."
+       fi
 
        # clean all CDTs orphans requests from previous tests
        # that would otherwise need to timeout to clear.