From 6facf3953b170832200ca9c111398da8feecd281 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Mon, 15 Sep 2014 17:37:31 +0200 Subject: [PATCH] LU-5622 tests: check/wait for copytool death Seems that copytool death/kill may take more time so this condition must be handled in sanity-hsm copytool_cleanup() function to avoid situations where copytool will then not be restarted, but only signaled, in next copytool_setup(). Signed-off-by: Bruno Faccini Change-Id: Ia72ed07f0219cf0aa2ef5b3805fb1f7faf4dab66 Reviewed-on: http://review.whamcloud.com/11922 Reviewed-by: Henri Doreau Tested-by: Jenkins Reviewed-by: Robert Read Reviewed-by: James Nunez Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/tests/sanity-hsm.sh | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index f1e6e52..5ee7105 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -216,7 +216,7 @@ copytool_setup() { if [[ -z "$arc_id" ]] && do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then - echo "Wakeup copytool $facet on $agent" + echo "Only wakeup running copytool $facet on $agent" return 0 fi @@ -273,10 +273,25 @@ copytool_cleanup() { local oldstate local mdt_hsmctrl local hsm_root=$(copytool_device $facet) + local end_wait=$(( SECONDS + TIMEOUT )) do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 - sleep 1 - echo "Copytool is stopped on $agents" + + while (( SECONDS < end_wait )); do + sleep 2 + do_nodesv $agents "pgrep -x $HSMTOOL_BASE" + if [ $? -ne 0 ]; then + echo "Copytool is stopped on $agents" + break + fi + echo "Copytool still running on $agents" + done + if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then + error "Copytool failed to stop in ${TIMEOUT}s ..." + else + echo "Copytool has stopped in " \ + "$((TIMEOUT - (end_wait - SECONDS)))s." + fi # clean all CDTs orphans requests from previous tests # that would otherwise need to timeout to clear. -- 1.8.3.1