From 97fc8c8caf41e9d74cdb1e373f19c907ed8481b2 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Fri, 5 Dec 2014 12:23:51 -0800 Subject: [PATCH] LU-5622 tests: check/wait for copytool death Seems that copytool death/kill may take more time so this condition must be handled in sanity-hsm copytool_cleanup() function to avoid situations where copytool will then not be restarted, but only signaled, in next copytool_setup(). This patch is back-ported from the following one: Lustre-commit: 6facf3953b170832200ca9c111398da8feecd281 Lustre-change: http://review.whamcloud.com/11922 Signed-off-by: Bruno Faccini Change-Id: Ia817936eb030386dbe539ec8d5297812f4b6fff2 Reviewed-on: http://review.whamcloud.com/12967 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Nunez Reviewed-by: Henri Doreau Reviewed-by: Oleg Drokin --- lustre/tests/sanity-hsm.sh | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 26161ac..3c8b73d 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -213,7 +213,7 @@ copytool_setup() { if [[ -z "$arc_id" ]] && do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then - echo "Wakeup copytool $facet on $agent" + echo "Only wakeup running copytool $facet on $agent" return 0 fi @@ -270,10 +270,25 @@ copytool_cleanup() { local oldstate local mdt_hsmctrl local hsm_root=$(copytool_device $facet) + local end_wait=$(( SECONDS + TIMEOUT )) do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 - sleep 1 - echo "Copytool is stopped on $agents" + + while (( SECONDS < end_wait )); do + sleep 2 + do_nodesv $agents "pgrep -x $HSMTOOL_BASE" + if [ $? -ne 0 ]; then + echo "Copytool is stopped on $agents" + break + fi + echo "Copytool still running on $agents" + done + if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then + error "Copytool failed to stop in ${TIMEOUT}s ..." + else + echo "Copytool has stopped in " \ + "$((TIMEOUT - (end_wait - SECONDS)))s." + fi # clean all CDTs orphans requests from previous tests # that would otherwise need to timeout to clear. -- 1.8.3.1